Index: org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.dbf/src/main/java/org/gvsig/fmap/dal/store/dbf/DBFFeatureWriter.java =================================================================== --- org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.dbf/src/main/java/org/gvsig/fmap/dal/store/dbf/DBFFeatureWriter.java (revisión: 42991) +++ org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.dbf/src/main/java/org/gvsig/fmap/dal/store/dbf/DBFFeatureWriter.java (copia de trabajo) @@ -37,6 +37,7 @@ import org.gvsig.fmap.dal.feature.Feature; import org.gvsig.fmap.dal.feature.FeatureType; import org.gvsig.fmap.dal.feature.exception.AttributeFeatureTypeNotSuportedException; +import org.gvsig.fmap.dal.store.dbf.utils.DbaseCodepage; import org.gvsig.fmap.dal.store.dbf.utils.DbaseFileHeader; import org.gvsig.fmap.dal.store.dbf.utils.DbaseFileWriter; @@ -55,14 +56,25 @@ public void begin(DBFStoreParameters storeParameters, FeatureType featureType, long numRows) throws DataException { - // TODO if is new set the langID + String charset = storeParameters.getEffectiveEncodingName(); + if (charset==null) { + // set a safe encoding in case none has been defined + charset = "UTF-8"; + } try { - myHeader = DbaseFileHeader.createDbaseHeader(featureType); + + myHeader = DbaseFileHeader.createDbaseHeader(featureType, charset); } catch (AttributeFeatureTypeNotSuportedException e1) { throw new WriteException(this.name, e1); } dbfFile = storeParameters.getDBFFile(); + + // .cpg will be redundant if LDID was already set, but we still + // want to write it with the hopes of increasing the range of programs + // that will correctly interpret the charset + DbaseCodepage cpWriter = new DbaseCodepage(dbfFile); + cpWriter.write(charset); dbfChannel = null; @@ -74,9 +86,6 @@ } try{ this.dbfWriter = new DbaseFileWriter(myHeader, dbfChannel, true); - - - this.dbfWriter.setCharset(Charset.forName("ISO-8859-1")); } catch (InitializeException e) { throw new WriteException(this.name, e); } @@ -108,8 +117,6 @@ File f = new File(path); if (!f.exists()) { - // System.out.println("Creando fichero " + f.getAbsolutePath()); - if (!f.createNewFile()) { System.err.print("Error al crear el fichero " + f.getAbsolutePath()); Index: org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.dbf/src/main/java/org/gvsig/fmap/dal/store/dbf/DBFLibrary.java =================================================================== --- org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.dbf/src/main/java/org/gvsig/fmap/dal/store/dbf/DBFLibrary.java (revisión: 42991) +++ org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.dbf/src/main/java/org/gvsig/fmap/dal/store/dbf/DBFLibrary.java (copia de trabajo) @@ -43,6 +43,8 @@ import org.gvsig.fmap.dal.resource.spi.ResourceManagerProviderServices; import org.gvsig.fmap.dal.spi.DataManagerProviderServices; import org.gvsig.fmap.dal.store.dbf.utils.DbaseFile; +import org.gvsig.metadata.MetadataLocator; +import org.gvsig.metadata.MetadataManager; import org.gvsig.metadata.exceptions.MetadataException; import org.gvsig.tools.ToolsLocator; import org.gvsig.tools.dynobject.DynObjectValueItem; @@ -71,7 +73,7 @@ DBFStoreParameters.class, "DBFParameters.xml" ); - updateEncodingDefinition(); + FileHelper.registerParametersDefinition( DBFNewStoreParameters.PARAMETERS_DEFINITION_NAME, DBFNewStoreParameters.class, @@ -86,7 +88,8 @@ } catch (MetadataException e) { exs.add(e); } - + updateEncodingDefinition(); + DataManagerProviderServices dataman = (DataManagerProviderServices) DALLocator .getDataManager(); @@ -114,18 +117,24 @@ private static void updateEncodingDefinition() { DynStruct parametersDefinition = ToolsLocator.getPersistenceManager().getDefinition(DBFStoreParameters.PARAMETERS_DEFINITION_NAME); - - DynObjectValueItem[] values = parametersDefinition.getDynField("encoding").getAvailableValues(); - + DynObjectValueItem[] values = parametersDefinition.getDynField("Encoding").getAvailableValues(); Set charsetSet = new LinkedHashSet(160); - charsetSet.addAll( Arrays.asList(values) ); + + MetadataManager metadataManager = MetadataLocator.getMetadataManager(); + DynStruct dynStruct = metadataManager.getDefinition(DBFStoreProvider.METADATA_DEFINITION_NAME); + DynObjectValueItem[] metadataValues = dynStruct.getDynField("Encoding").getAvailableValues(); + Set metadataCharsetSets = new LinkedHashSet(160); + metadataCharsetSets.addAll( Arrays.asList(metadataValues) ); + Map charsets = Charset.availableCharsets(); Iterator iter = charsets.keySet().iterator(); while (iter.hasNext()){ String value = (String) iter.next(); String label= value; - charsetSet.add(new DynObjectValueItem(value, label)); + DynObjectValueItem item = new DynObjectValueItem(value, label); + charsetSet.add(item); + metadataCharsetSets.add(item); } parametersDefinition.getDynField("Encoding") @@ -132,7 +141,11 @@ .setAvailableValues( (DynObjectValueItem[]) charsetSet.toArray(new DynObjectValueItem[charsets.size()]) ); + dynStruct.getDynField("Encoding") + .setAvailableValues( + (DynObjectValueItem[]) metadataCharsetSets.toArray( + new DynObjectValueItem[metadataCharsetSets.size()]) + ); - } } Index: org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.dbf/src/main/java/org/gvsig/fmap/dal/store/dbf/DBFStoreParameters.java =================================================================== --- org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.dbf/src/main/java/org/gvsig/fmap/dal/store/dbf/DBFStoreParameters.java (revisión: 42991) +++ org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.dbf/src/main/java/org/gvsig/fmap/dal/store/dbf/DBFStoreParameters.java (copia de trabajo) @@ -29,13 +29,15 @@ import java.util.Locale; import org.apache.commons.lang3.BooleanUtils; import org.apache.commons.lang3.StringUtils; - +import org.gvsig.fmap.dal.DataParameters; import org.gvsig.fmap.dal.FileHelper; import org.gvsig.fmap.dal.OpenDataStoreParameters; +import org.gvsig.fmap.dal.exception.CopyParametersException; import org.gvsig.fmap.dal.exception.ValidateDataParametersException; import org.gvsig.fmap.dal.serverexplorer.filesystem.FilesystemStoreParameters; import org.gvsig.fmap.dal.spi.AbstractDataParameters; import org.gvsig.fmap.dal.spi.DataStoreProviderServices; +import org.gvsig.fmap.dal.store.dbf.utils.DbaseCodepage; import org.gvsig.tools.dynobject.DelegatedDynObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -54,6 +56,9 @@ public static final String DATE_FORMAT = "dateFormat"; public static final String LOCALE = "locale"; public static final String ALLOW_DUPLICATED_FIELD_NAMES = "allowDuplicatedFieldNames"; + // We don't want to persist or show effective encoding in the store parameters dialog + // But we still need it to properly encode layers when editing!! + private String effectiveEncoding = null; private DelegatedDynObject parameters; @@ -109,7 +114,16 @@ public void setDBFFile(File file) { this.setDynValue(DBFFILE_PARAMTER_NAME, file); } + + public String getCPGFileName() { + return DbaseCodepage.getCpgFileName(getDBFFileName()); + } + + public File getCPGFile() { + return new File(DbaseCodepage.getCpgFileName(getDBFFileName())); + } + public void setDBFFile(String fileName) { this.setDynValue(DBFFILE_PARAMTER_NAME, fileName); } @@ -132,10 +146,43 @@ } return Charset.forName(name); } + /** + * The encoding actually used to read/write the dbf + * + * @param encoding + */ + public String getEffectiveEncodingName() { + if (effectiveEncoding==null) { + return getEncodingName(); + } + return effectiveEncoding.trim(); + } + + /** + * The encoding actually used to read/write the dbf + * + * @param encoding + */ + public Charset getEffectiveEncoding() { + String name = getEffectiveEncodingName(); + if (name==null) { + return null; + } + return Charset.forName(name); + } public void setEncoding(String encoding) { this.setEncoding(Charset.forName(encoding)); } + + /** + * The encoding actually used to read/write the dbf + * + * @param encoding + */ + public void setEffectiveEncoding(String encoding) { + this.effectiveEncoding = encoding; + } public boolean handleDatesAsStrings() { Boolean x = (Boolean) getDynValue(HANDLE_DATES_AS_STRINGS); @@ -150,6 +197,25 @@ public void setEncoding(Charset charset) { this.setDynValue(ENCODING_PARAMTER_NAME, charset.name()); } + + /** + * The encoding actually used to read/write the dbf + * + * @param encoding + */ + public void setEffectiveEncoding(Charset charset) { + this.effectiveEncoding = charset.name(); + } + + @Override + public DataParameters getCopy() { + DataParameters copy = super.getCopy(); + if (copy instanceof DBFStoreParameters) { + DBFStoreParameters dbfParams = (DBFStoreParameters) copy; + dbfParams.setEffectiveEncoding(this.effectiveEncoding); + } + return copy; + } protected DelegatedDynObject getDelegatedDynObject() { return parameters; Index: org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.dbf/src/main/java/org/gvsig/fmap/dal/store/dbf/DBFStoreProvider.java =================================================================== --- org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.dbf/src/main/java/org/gvsig/fmap/dal/store/dbf/DBFStoreProvider.java (revisión: 42991) +++ org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.dbf/src/main/java/org/gvsig/fmap/dal/store/dbf/DBFStoreProvider.java (copia de trabajo) @@ -179,7 +179,7 @@ if (METADATA_ENCODING.equalsIgnoreCase(name)) { return this.dbfFile.getOriginalCharset(); } else if (METADATA_CODEPAGE.equalsIgnoreCase(name)) { - return new Byte(this.dbfFile.getCodePage()); + return new Integer(this.dbfFile.getCodePageInt()); } return super.getDynValue(name); } @@ -690,6 +690,8 @@ protected void openFile() throws FileNotFoundException, UnsupportedVersionException, IOException, DataException { this.dbfFile.open(); + // necessary when editing the file + this.getDBFParameters().setEffectiveEncoding(this.dbfFile.getCharsetName()); } public void close() throws CloseException { @@ -930,4 +932,5 @@ public ResourceProvider getResource() { return dbfResource; } + } Index: org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.dbf/src/main/java/org/gvsig/fmap/dal/store/dbf/utils/DbaseFile.java =================================================================== --- org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.dbf/src/main/java/org/gvsig/fmap/dal/store/dbf/utils/DbaseFile.java (revisión: 42991) +++ org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.dbf/src/main/java/org/gvsig/fmap/dal/store/dbf/utils/DbaseFile.java (copia de trabajo) @@ -206,7 +206,7 @@ } public DbaseFile(File afile, Charset chars) { - this(afile, null, false); + this(afile, chars, false); } public DbaseFile(File afile, Charset chars, boolean allowDuplicatedFieldNames) { @@ -215,10 +215,39 @@ this.allowDuplicatedFieldNames = allowDuplicatedFieldNames; } + /** + * @deprecated Use {@link #getCodePageInt()} instead + */ + @Deprecated public byte getCodePage() { + return (byte) myHeader.getLanguageID(); + } + + public int getCodePageInt() { return myHeader.getLanguageID(); } + + /** + * Returns the charset used to read/write this dbf. Maybe different + * from the declared in the file if we have forced a different one + * + * @return + */ + public String getCharsetName() { + return chars.name(); + //return myHeader.getCharsetName(); + } + /** + * Returns the charset declared on the dbf file (or the + * default one if none is declared) + * + * @return + */ + public String getOriginalCharsetName() { + return myHeader.getOriginalCharset(); + } + // Retrieve number of records in the DbaseFile public int getRecordCount() { return myHeader.getNumRecords(); @@ -295,13 +324,7 @@ cachedRecord.position(fieldOffset); cachedRecord.get(data); - try { - return new String(data, chars.name()); - } catch (java.io.UnsupportedEncodingException e) { - throw new UnsupportedEncodingException( - e); - } - + return new String(data, chars); } public void setFieldValue(int rowIndex, int fieldId, Object obj) throws UnsupportedEncodingException, WriteException { @@ -313,7 +336,7 @@ + myHeader.getHeaderLength() + 1; ByteBuffer aux = ByteBuffer.wrap(data); - aux.put(str.getBytes(chars.name())); + aux.put(str.getBytes(chars)); // raf.seek(recordOffset + fieldOffset); // raf.writeBytes(str); aux.flip(); @@ -414,16 +437,7 @@ */ public void open() throws FileNotFoundException, UnsupportedVersionException, IOException { - /* - * 01h DOS USA code page 437 02h DOS Multilingual code page 850 03h - * Windows ANSI code page 1252 04h Standard Macintosh 64h EE MS-DOS code - * page 852 65h Nordic MS-DOS code page 865 66h Russian MS-DOS code page - * 866 67h Icelandic MS-DOS 68h Kamenicky (Czech) MS-DOS 69h Mazovia - * (Polish) MS-DOS 6Ah Greek MS-DOS (437G) 6Bh Turkish MS-DOS 96h - * Russian Macintosh 97h Eastern European Macintosh 98h Greek Macintosh - * C8h Windows EE code page 1250 C9h Russian Windows CAh Turkish Windows - * CBh Greek Windows - */ + if (!file.exists()) { throw new FileNotFoundException(file); } @@ -452,7 +466,18 @@ } else { myHeader.readHeader(buffer, chars.name(), allowDuplicatedFieldNames); } - charsOriginal = Charset.forName(myHeader.mappingEncoding(myHeader.getCharsetName())); + if (myHeader.getLanguageID()==0x00) { + // read from the .cpg file if ldid is 0x00 + DbaseCodepage cpReader = new DbaseCodepage(file); + String charsetName = cpReader.read(); + if (charsetName == null) { + charsetName = "ISO-8859-1"; // for compatibility with old gvSIG files + } + charsOriginal = Charset.forName(myHeader.mappingEncoding(charsetName)); + } + else { + charsOriginal = Charset.forName(myHeader.mappingEncoding(myHeader.getOriginalCharset())); + } if (chars == null) { chars = charsOriginal; } Index: org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.dbf/src/main/java/org/gvsig/fmap/dal/store/dbf/utils/DbaseFileHeader.java =================================================================== --- org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.dbf/src/main/java/org/gvsig/fmap/dal/store/dbf/utils/DbaseFileHeader.java (revisión: 42991) +++ org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.dbf/src/main/java/org/gvsig/fmap/dal/store/dbf/utils/DbaseFileHeader.java (copia de trabajo) @@ -85,10 +85,22 @@ // collection of header records. private DbaseFieldDescriptor[] myFieldDescriptions; - private byte myLanguageID; + private int myLanguageID = 0x00; + /** + * Only considered when myLanguageID = 0x00; + */ + private String charset = null; private List encodingSupportedByString = null; + private int origLanguageID = 0x00; + + /** + * Headers must always be encoded using ASCII/ISO-8859-1, regardless the + * encoding of the records + */ + private static final Charset headerCharset = Charset.forName("ISO-8859-1"); + /** * DbaseFileHreader constructor comment. */ @@ -431,18 +443,20 @@ in.order(ByteOrder.BIG_ENDIAN); - // skip the reserved bytes in the header. - // in.position(in.position() + 20); - - // Leemos el byte de language + // read the language bit (LDID) in.position(29); - myLanguageID = in.get(); - if (charsName == null) { + origLanguageID = byteAsUnsigned(in.get()); + if (charsName != null) { + // ignore the language bit, use the provided charset name + myLanguageID = DbaseCodepage.getLdid(charsName); + this.charset = charsName; + } + else { + // use the read the language bit + myLanguageID = origLanguageID; charsName = getCharsetName(); - charsName = mappingEncoding(charsName); - } + } - // Posicionamos para empezar a leer los campos. in.position(32); @@ -455,6 +469,7 @@ List fieldNames = new ArrayList(); + // FIXME: should field names be always read using ISO8859-1?? for (int i = 0; i < myNumFields; i++) { myFieldDescriptions[i] = new DbaseFieldDescriptor(); @@ -462,12 +477,7 @@ byte[] buffer = new byte[11]; in.get(buffer); String fieldName; - if (charsName != null) { - fieldName = new String(buffer, - charsName); - } else { - fieldName = new String(buffer); - } + fieldName = new String(buffer, headerCharset); if(allowDuplicatedFieldNames){ fieldName = getUniqueFieldName(fieldName, fieldNames); @@ -520,61 +530,27 @@ public void setNumRecords(int inNumRecords) { myNumRecords = inNumRecords; } - - /* - * Write the header data to the DBF file. - * - * @param out DOCUMENT ME! - * - * @throws Exception DOCUMENT ME! - * - public void writeHeader(LEDataOutputStream out) throws Exception { - // write the output file type. - out.writeByte(myFileType); - // write the date stuff - Calendar c = Calendar.getInstance(); - c.setTime(new Date()); - out.writeByte(c.get(Calendar.YEAR) - 1900); - out.writeByte(c.get(Calendar.MONTH) + 1); - out.writeByte(c.get(Calendar.DAY_OF_MONTH)); - // write the number of records in the datafile. - out.writeInt(myNumRecords); - // write the length of the header structure. - out.writeShort(myHeaderLength); - // write the length of a record - out.writeShort(myRecordLength); - // write the reserved bytes in the header - for (int i = 0; i < 20; i++) - out.writeByte(0); - // write all of the header records - int tempOffset = 0; - for (int i = 0; i < myFieldDescriptions.length; i++) { - // write the field name - for (int j = 0; j < 11; j++) { - if (myFieldDescriptions[i].myFieldName.length() > j) { - out.writeByte((int) myFieldDescriptions[i].myFieldName.charAt( - j)); - } else { - out.writeByte(0); - } - } - // write the field type - out.writeByte(myFieldDescriptions[i].myFieldType); - // write the field data address, offset from the start of the record. - out.writeInt(tempOffset); - tempOffset += myFieldDescriptions[i].myFieldLength; - // write the length of the field. - out.writeByte(myFieldDescriptions[i].myFieldLength); - // write the decimal count. - out.writeByte(myFieldDescriptions[i].myDecimalCount); - // write the reserved bytes. - for (int j = 0; j < 14; j++) - out.writeByte(0); - } - // write the end of the field definitions marker - out.writeByte(0x0D); - } + + /** + * Returns the value of the unsigned byte as a short + * Bytes are always signed in Java, so if we are reading a C unsigned byte + * with value > 128, it will appear as a negative value. + * + * In this case, we need to get the original unsigned value and return it as + * short or int, as byte will never correctly store the value in Java. + * + * @return */ + private int byteAsUnsigned(byte b) { + int i; + if (b<0) { + i = b & 0xFF; + } + else { + i = b; + } + return i; + } /** * Class for holding the information assicated with a record. @@ -598,7 +574,38 @@ int myDecimalCount; } - public byte getLanguageID() { + /** + * Gets the Language driver IDs (code page) defined on the file header (or guessed + * from the provided charset) + * + * Some examples: + * 01h DOS USA code page 437 + 02h DOS Multilingual code page 850 + 03h Windows ANSI code page 1252 + 04h Standard Macintosh + 64h EE MS-DOS code page 852 + 65h Nordic MS-DOS code page 865 + 66h Russian MS-DOS code page 866 + 67h Icelandic MS-DOS + 68h Kamenicky (Czech) MS-DOS + 69h Mazovia (Polish) MS-DOS + 6Ah Greek MS-DOS (437G) + 6Bh Turkish MS-DOS + 96h Russian Macintosh + 97h Eastern European Macintosh + 98h Greek Macintosh + C8h Windows EE code page 1250 + C9h Russian Windows + CAh Turkish Windows + CBh Greek Windows + + See the java equivalences in {@link DbaseCodepage#dbfLdid} & {@link DbaseCodepage#ldidJava} objects. + + See some others here: https://github.com/infused/dbf/blob/master/docs/supported_encodings.csv + * @return + */ + public int getLanguageID() { + return myLanguageID; } @@ -606,9 +613,15 @@ public static DbaseFileHeader createDbaseHeader(FeatureType featureType) throws AttributeFeatureTypeNotSuportedException { + return createDbaseHeader(featureType, null); + } + + public static DbaseFileHeader createDbaseHeader(FeatureType featureType, String charsetName) + throws AttributeFeatureTypeNotSuportedException { DbaseFileHeader header = new DbaseFileHeader(); Iterator iterator=featureType.iterator(); - // TODO header.myLanguageID = langId; + header.myLanguageID = DbaseCodepage.getLdid(charsetName); + header.charset = charsetName; while (iterator.hasNext()) { FeatureAttributeDescriptor descriptor = (FeatureAttributeDescriptor) iterator.next(); @@ -643,6 +656,7 @@ } return header; } + /** * Write the header data to the DBF file. * @@ -685,9 +699,14 @@ // write the length of a record buffer.putShort((short) myRecordLength); - // // write the reserved bytes in the header - // for (int i=0; i<20; i++) out.writeByteLE(0); - buffer.position(buffer.position() + 20); + // write the reserved bytes in the header + buffer.position(buffer.position() + 17); + + // write the language id + buffer.put((byte)getLanguageID()); + + // write the reserved bytes in the header + buffer.position(buffer.position() + 2); // write all of the header records int tempOffset = 0; @@ -734,59 +753,30 @@ } } - /** - * 01h DOS USA code page 437 - 02h DOS Multilingual code page 850 - 03h Windows ANSI code page 1252 - 04h Standard Macintosh - 64h EE MS-DOS code page 852 - 65h Nordic MS-DOS code page 865 - 66h Russian MS-DOS code page 866 - 67h Icelandic MS-DOS - 68h Kamenicky (Czech) MS-DOS - 69h Mazovia (Polish) MS-DOS - 6Ah Greek MS-DOS (437G) - 6Bh Turkish MS-DOS - 96h Russian Macintosh - 97h Eastern European Macintosh - 98h Greek Macintosh - C8h Windows EE code page 1250 - C9h Russian Windows - CAh Turkish Windows - CBh Greek Windows - * @return - */ public String getCharsetName() { - switch (getLanguageID()) { - case 0x01: - return "US-ASCII"; - case 0x02: - return "ISO-8859-1"; - case 0x03: - return "windows-1252"; - case 0x04: - return "mac"; - case 0x64: - return "ISO-8859-1"; - case 0x65: - return "ISO-8859-1"; - case 0x66: - return "ISO-8859-1"; - case 0x67: - return "ISO-8859-1"; - case 0x68: - return "greek"; - case 0x69: - return "ISO-8859-1"; - case 0x6A: - return "greek"; - case 0x6B: - return "ISO-8859-1"; - - default: - return "ISO-8859-1"; + return getCharsetName(getLanguageID()); + } + + public String getCharsetName(int ldid) { + if (ldid!=0) { + // prefer the charset defined by the ldid + for (int i=0; ilimit) { + // too long, truncating + /* + * The block code bellow is equivalent to this simple code + * fragment: + + if (rightPadding) { + in = in.substring(0, in.length()-1); + encodedString = in.getBytes(charset); + } + else { + in.substring(1, in.length()); + encodedString = in.getBytes(charset); + } + + However, the implemented algorithm has a much better performance + for the average and worst cases (when the input string has a lot + of multibyte characters), while keeping a good performance + for the best case (when all the characters in the input string + can be represented as single bytes using the selected charset). + + The general strategy is to compute the deviation from the + required maximum number of bytes (limit) and the actual number + of bytes of the encoded String. + + Then, we use this deviation to estimate the amount of characters + to truncate, based on the average factor of bytes per char in the + input string. + + We truncate the string using this approach until the deviation + gets stable. + + Finally, as we should be close enough to the right truncation position, + we increment/decrement the truncated string by only 1 character, to + ensure we truncate in the exact position. + */ + String str = in; + int estimatedDiff, deviation; + int deviationPrev; + double ratio; + byte[] encodedChar; + int truncatePos = 0; + deviation = encodedString.length - limit; + deviationPrev = deviation - 1; + while(Math.abs(deviation)>Math.abs(deviationPrev) && str.length()>0) { + ratio = ((double)encodedString.length) / ((double)str.length()); + // apply the estimated diff, ensuring it is at least >= 1.0 in absolute value + estimatedDiff = Math.max((int)(((double)deviation)/ratio), (int)(Math.signum(deviation)*1)); + // too long, truncating + if (rightPadding) { + truncatePos = Math.max(str.length()-estimatedDiff, 0); + str = in.substring(0, truncatePos); + } + else { + truncatePos = Math.max(truncatePos + estimatedDiff, 0); + str = in.substring(truncatePos); + } + encodedString = str.getBytes(charset); + deviationPrev = deviation; + deviation = encodedString.length - limit; + } + // now we are close enough, get the exact position for truncating + while (encodedString.length>limit) { + // too long, truncating + // System.out.println("truncating"); + if (rightPadding) { + str = in.substring(0, str.length()-1); + } + else { + truncatePos = truncatePos + 1; + str = in.substring(truncatePos); + } + encodedString = str.getBytes(charset); + } + while (encodedString.lengthlimit) { + // one more char would overflow the limit + break; + } + // too short, extending + if (rightPadding) { + str = in.substring(0, str.length()+1); + } + else { + truncatePos = truncatePos - 1; + str = in.substring(truncatePos); + } + encodedString = str.getBytes(charset); + } + } + if (rightPadding) { + buffer.put(encodedString); + } + if (encodedString.lengthlimit) { + // Might happen for instance if charset is UTF16 and the + // limit of characters in the field is an odd number + throw new UnsupportedEncodingException(new Exception("Impossible to encode this DBF using the selected charset")); + } + } + if (!rightPadding) { + buffer.put(encodedString); + } + } + catch(BufferOverflowException exc) { + // Might happen for instance if charset is UTF16 and the + // limit of characters in the field is an odd number + throw new UnsupportedEncodingException(exc); + } + } + + /** + * Returns a safely padded (and potentially truncated) string + * + * This may truncate some record, but it is required to ensure + * that the field limit is not overflowed when using + * variable-length charsets such as UTF-8. + * + * This implementation is not used but it is kept here for reference. + * It is fully equivalent to the {@link #safeEncode(String, int, boolean)} + * method and easier to understand, but this implementation is much + * slower for any multibyte charset (such as UTF-8). + * + * @throws UnsupportedEncodingException + */ + private void safeEncodeSlow(String in, int limit, boolean rightPadding) throws UnsupportedEncodingException { + try { + byte[] encodedString = in.getBytes(this.charset); + while (encodedString.length>limit) { + // too long, truncating + if (rightPadding) { + in = in.substring(0, in.length()-1); + encodedString = in.getBytes(charset); + } + else { + in.substring(1, in.length()); + encodedString = in.getBytes(charset); + } + } + if (rightPadding) { + buffer.put(encodedString); + } + if (encodedString.lengthlimit) { + throw new UnsupportedEncodingException(new Exception("Impossible to encode this DBF using the selected charset")); + } + } + if (!rightPadding) { + buffer.put(encodedString); + } + } + catch(BufferOverflowException exc) { + // Might happen for instance if charset is UTF16 and the + // limit of characters in the field is an odd number + throw new UnsupportedEncodingException(exc); + } + } + + + /** * Release resources associated with this writer. Highly recommended * * @throws CloseException @@ -485,21 +668,10 @@ public String getFieldString(int size, int decimalPlaces, double n) { buffer.delete(0, buffer.length()); - // if (n != null) { numFormat.setMaximumFractionDigits(decimalPlaces); numFormat.setMinimumFractionDigits(decimalPlaces); numFormat.format(n, buffer, new FieldPosition( NumberFormat.INTEGER_FIELD)); - // } - - int diff = size - buffer.length(); - if (diff >= 0) { - while (diff-- > 0) { - buffer.insert(0, ' '); - } - } else { - buffer.setLength(size); - } return buffer.toString(); } } @@ -506,7 +678,8 @@ public void setCharset(Charset charset) { this.charset = charset; - + blank = charset.encode(" "); + blankSize = blank.limit(); } } Index: org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.shp/src/main/java/org/gvsig/fmap/dal/store/shp/SHPStoreProvider.java =================================================================== --- org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.shp/src/main/java/org/gvsig/fmap/dal/store/shp/SHPStoreProvider.java (revisión: 42991) +++ org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.shp/src/main/java/org/gvsig/fmap/dal/store/shp/SHPStoreProvider.java (copia de trabajo) @@ -502,6 +502,13 @@ + "' file to replace with the new shx.\nThe new shx is in temporary file '" + str_base + "'"); } + if (shpParams.getCPGFile().exists() && !shpParams.getCPGFile().delete()) { + logger.debug("Can't delete cpg file '" + shpParams.getCPGFile() + "'."); + throw new IOException("Can't delete cpg '" + + FilenameUtils.getBaseName(shpParams.getCPGFileName()) + + "' file to replace with the new cpg.\nThe new cpg is in temporary file '" + str_base + + "'"); + } File prjFile = SHP.getPrjFile(shpParams.getSHPFile()); if (prjFile.exists()) { @@ -516,6 +523,7 @@ FileUtils.moveFile(tmpParams.getDBFFile(), shpParams.getDBFFile()); FileUtils.moveFile(tmpParams.getSHPFile(), shpParams.getSHPFile()); FileUtils.moveFile(tmpParams.getSHXFile(), shpParams.getSHXFile()); + FileUtils.moveFile(tmpParams.getCPGFile(), shpParams.getCPGFile()); savePrjFile(shpParams.getFile(), tmpParams.getCRS()); Index: org.gvsig.desktop.library/org.gvsig.exportto/org.gvsig.exportto.swing/org.gvsig.exportto.swing.prov/org.gvsig.exportto.swing.prov.shape/pom.xml =================================================================== --- org.gvsig.desktop.library/org.gvsig.exportto/org.gvsig.exportto.swing/org.gvsig.exportto.swing.prov/org.gvsig.exportto.swing.prov.shape/pom.xml (revisión: 42991) +++ org.gvsig.desktop.library/org.gvsig.exportto/org.gvsig.exportto.swing/org.gvsig.exportto.swing.prov/org.gvsig.exportto.swing.prov.shape/pom.xml (copia de trabajo) @@ -29,6 +29,11 @@ org.gvsig + org.gvsig.exportto.swing.prov.dbf + compile + + + org.gvsig org.gvsig.fmap.dal.api compile Index: org.gvsig.desktop.library/org.gvsig.exportto/org.gvsig.exportto.swing/org.gvsig.exportto.swing.prov/org.gvsig.exportto.swing.prov.shape/src/main/java/org/gvsig/exportto/swing/prov/shape/ExporttoShapeProvider.java =================================================================== --- org.gvsig.desktop.library/org.gvsig.exportto/org.gvsig.exportto.swing/org.gvsig.exportto.swing.prov/org.gvsig.exportto.swing.prov.shape/src/main/java/org/gvsig/exportto/swing/prov/shape/ExporttoShapeProvider.java (revisión: 42991) +++ org.gvsig.desktop.library/org.gvsig.exportto/org.gvsig.exportto.swing/org.gvsig.exportto.swing.prov/org.gvsig.exportto.swing.prov.shape/src/main/java/org/gvsig/exportto/swing/prov/shape/ExporttoShapeProvider.java (copia de trabajo) @@ -24,8 +24,9 @@ package org.gvsig.exportto.swing.prov.shape; import org.cresques.cts.IProjection; - import org.gvsig.exportto.ExporttoService; +import org.gvsig.exportto.swing.prov.dbf.ExporttoDBFService; +import org.gvsig.exportto.swing.prov.dbf.panel.ExporttoDBFPanel; import org.gvsig.exportto.swing.prov.file.AbstractExporttoFileProvider; import org.gvsig.exportto.swing.spi.ExporttoSwingProvider; import org.gvsig.fmap.dal.feature.FeatureStore; @@ -50,11 +51,11 @@ */ public ExporttoShapeProvider(ProviderServices providerServices, FeatureStore featureStore, IProjection projection) { - super(providerServices, featureStore, projection); + super(providerServices, featureStore, projection, new ExporttoDBFPanel()); } public ExporttoService createExporttoService() { return new ExporttoShapeService( - selectFileOptionPanel, featureStore, projection); + selectFileOptionPanel, featureStore, projection, ((ExporttoDBFPanel) selectFileOptionPanel).getEncoding()); } } Index: org.gvsig.desktop.library/org.gvsig.exportto/org.gvsig.exportto.swing/org.gvsig.exportto.swing.prov/org.gvsig.exportto.swing.prov.shape/src/main/java/org/gvsig/exportto/swing/prov/shape/ExporttoShapeService.java =================================================================== --- org.gvsig.desktop.library/org.gvsig.exportto/org.gvsig.exportto.swing/org.gvsig.exportto.swing.prov/org.gvsig.exportto.swing.prov.shape/src/main/java/org/gvsig/exportto/swing/prov/shape/ExporttoShapeService.java (revisión: 42991) +++ org.gvsig.desktop.library/org.gvsig.exportto/org.gvsig.exportto.swing/org.gvsig.exportto.swing.prov/org.gvsig.exportto.swing.prov.shape/src/main/java/org/gvsig/exportto/swing/prov/shape/ExporttoShapeService.java (copia de trabajo) @@ -35,9 +35,6 @@ import org.cresques.cts.ICoordTrans; import org.cresques.cts.IProjection; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import org.gvsig.exportto.ExporttoService; import org.gvsig.exportto.ExporttoServiceException; import org.gvsig.exportto.ExporttoServiceFinishAction; @@ -79,6 +76,8 @@ import org.gvsig.tools.dispose.DisposableIterator; import org.gvsig.tools.dispose.DisposeUtils; import org.gvsig.tools.task.AbstractMonitorableTask; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** @@ -102,6 +101,7 @@ private NewFeatureStoreParameters newFeatureStoreParameters; private FilesystemServerExplorer filesystemServerExplorer; private SelectFileOptionPanel filePanel = null; + private String encoding = null; private static GeometryManager geoManager = GeometryLocator.getGeometryManager(); @@ -112,11 +112,21 @@ FeatureStore featureStore, IProjection projection) { + this(fPanel, featureStore, projection, null); + } + + public ExporttoShapeService( + SelectFileOptionPanel fPanel, + FeatureStore featureStore, + IProjection projection, + String encoding) { + super("Export to shape"); this.featureStore = featureStore; this.filePanel = fPanel; this.theShapeFile = fPanel.getSelectedFile(); this.projection = projection; + this.encoding = encoding; try { origNameToDbfName = getNames(featureStore.getDefaultFeatureType()); @@ -356,6 +366,7 @@ } newFeatureStoreParameters.setDynValue("CRS", projection); + newFeatureStoreParameters.setDynValue("encoding", encoding); geometryType = featureSet.getDefaultFeatureType().getDefaultGeometryAttribute()