svn-gvsig-desktop / trunk / org.gvsig.desktop / org.gvsig.desktop.library / org.gvsig.utils / src / main / java / org / gvsig / utils / xml / XMLEncodingUtils.java @ 40561
History | View | Annotate | Download (6.3 KB)
1 | 40561 | jjdelcerro | /**
|
---|---|---|---|
2 | * gvSIG. Desktop Geographic Information System.
|
||
3 | 40435 | jjdelcerro | *
|
4 | 40561 | jjdelcerro | * Copyright (C) 2007-2013 gvSIG Association.
|
5 | 40435 | jjdelcerro | *
|
6 | * This program is free software; you can redistribute it and/or
|
||
7 | * modify it under the terms of the GNU General Public License
|
||
8 | 40561 | jjdelcerro | * as published by the Free Software Foundation; either version 3
|
9 | 40435 | jjdelcerro | * of the License, or (at your option) any later version.
|
10 | *
|
||
11 | * This program is distributed in the hope that it will be useful,
|
||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
14 | * GNU General Public License for more details.
|
||
15 | *
|
||
16 | * You should have received a copy of the GNU General Public License
|
||
17 | * along with this program; if not, write to the Free Software
|
||
18 | 40561 | jjdelcerro | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
19 | * MA 02110-1301, USA.
|
||
20 | 40435 | jjdelcerro | *
|
21 | 40561 | jjdelcerro | * For any additional information, do not hesitate to contact us
|
22 | * at info AT gvsig.com, or visit our website www.gvsig.com.
|
||
23 | 40435 | jjdelcerro | */
|
24 | package org.gvsig.utils.xml; |
||
25 | |||
26 | import java.io.BufferedInputStream; |
||
27 | import java.io.File; |
||
28 | import java.io.FileInputStream; |
||
29 | import java.io.FileNotFoundException; |
||
30 | import java.io.IOException; |
||
31 | import java.io.InputStream; |
||
32 | import java.io.InputStreamReader; |
||
33 | import java.io.UnsupportedEncodingException; |
||
34 | |||
35 | /**
|
||
36 | * A set of methods to detect XML encoding. The class is able to autodetect
|
||
37 | * certain encodings, and it reads the XML header for the rest of encodings.
|
||
38 | *
|
||
39 | * @author C?sar Mart?nez Izquierdo <cesar.martinez@iver.es>
|
||
40 | *
|
||
41 | */
|
||
42 | public class XMLEncodingUtils { |
||
43 | InputStream _is;
|
||
44 | |||
45 | /**
|
||
46 | * Creates a new XMLEncodingUtils object.
|
||
47 | *
|
||
48 | * @param is An InputStream connected to the XML file to process.
|
||
49 | */
|
||
50 | public XMLEncodingUtils(InputStream is) { |
||
51 | if (is == null) |
||
52 | throw new IllegalArgumentException(); |
||
53 | _is = is; |
||
54 | } |
||
55 | |||
56 | /**
|
||
57 | * Gets the encoding of the XML file.
|
||
58 | *
|
||
59 | * The following encodings can be detected: UTF-32BE, UTF-32LE,
|
||
60 | * UTF-16BE, UTF-16-LE, UTF-8. The rest of the encodings are
|
||
61 | * read from the XML header.
|
||
62 | *
|
||
63 | * @return Returns the encoding of the XML file, or null if the
|
||
64 | * encoding couldn't be correctly detected or read from the XML
|
||
65 | * header.
|
||
66 | */
|
||
67 | public String getEncoding() { |
||
68 | int srcCount = 0; |
||
69 | String enc=null; |
||
70 | char[] srcBuf = new char[128]; |
||
71 | |||
72 | // read four bytes
|
||
73 | int chk = 0; |
||
74 | try {
|
||
75 | while (srcCount < 4) { |
||
76 | int i = _is.read();
|
||
77 | if (i == -1) |
||
78 | break;
|
||
79 | chk = (chk << 8) | i;
|
||
80 | srcBuf[srcCount++] = (char) i;
|
||
81 | } |
||
82 | |||
83 | if (srcCount == 4) { |
||
84 | switch (chk) {
|
||
85 | case 0x00000FEFF : |
||
86 | enc = "UTF-32BE";
|
||
87 | srcCount = 0;
|
||
88 | break;
|
||
89 | |||
90 | case 0x0FFFE0000 : |
||
91 | enc = "UTF-32LE";
|
||
92 | srcCount = 0;
|
||
93 | break;
|
||
94 | |||
95 | case 0x03c : |
||
96 | enc = "UTF-32BE";
|
||
97 | srcBuf[0] = '<'; |
||
98 | srcCount = 1;
|
||
99 | break;
|
||
100 | |||
101 | case 0x03c000000 : |
||
102 | enc = "UTF-32LE";
|
||
103 | srcBuf[0] = '<'; |
||
104 | srcCount = 1;
|
||
105 | break;
|
||
106 | |||
107 | case 0x0003c003f : |
||
108 | enc = "UTF-16BE";
|
||
109 | srcBuf[0] = '<'; |
||
110 | srcBuf[1] = '?'; |
||
111 | srcCount = 2;
|
||
112 | break;
|
||
113 | |||
114 | case 0x03c003f00 : |
||
115 | enc = "UTF-16LE";
|
||
116 | srcBuf[0] = '<'; |
||
117 | srcBuf[1] = '?'; |
||
118 | srcCount = 2;
|
||
119 | break;
|
||
120 | |||
121 | case 0x03c3f786d : |
||
122 | while (true) { |
||
123 | int i = _is.read();
|
||
124 | if (i == -1) |
||
125 | break;
|
||
126 | srcBuf[srcCount++] = (char) i;
|
||
127 | if (i == '>') { |
||
128 | String s = new String(srcBuf, 0, srcCount); |
||
129 | int i0 = s.indexOf("encoding"); |
||
130 | if (i0 != -1) { |
||
131 | while (s.charAt(i0) != '"' |
||
132 | && s.charAt(i0) != '\'')
|
||
133 | i0++; |
||
134 | char deli = s.charAt(i0++);
|
||
135 | int i1 = s.indexOf(deli, i0);
|
||
136 | enc = s.substring(i0, i1); |
||
137 | } |
||
138 | break;
|
||
139 | } |
||
140 | } |
||
141 | |||
142 | default :
|
||
143 | if ((chk & 0x0ffff0000) == 0x0FEFF0000) { |
||
144 | enc = "UTF-16BE";
|
||
145 | srcBuf[0] =
|
||
146 | (char) ((srcBuf[2] << 8) | srcBuf[3]); |
||
147 | srcCount = 1;
|
||
148 | } |
||
149 | else if ((chk & 0x0ffff0000) == 0x0fffe0000) { |
||
150 | enc = "UTF-16LE";
|
||
151 | srcBuf[0] =
|
||
152 | (char) ((srcBuf[3] << 8) | srcBuf[2]); |
||
153 | srcCount = 1;
|
||
154 | } |
||
155 | else if ((chk & 0x0ffffff00) == 0x0EFBBBF00) { |
||
156 | enc = "UTF-8";
|
||
157 | srcBuf[0] = srcBuf[3]; |
||
158 | srcCount = 1;
|
||
159 | } |
||
160 | } |
||
161 | } |
||
162 | } |
||
163 | catch (IOException ex) { |
||
164 | return null; |
||
165 | } |
||
166 | return enc;
|
||
167 | } |
||
168 | |||
169 | /**
|
||
170 | * Gets an InputStreamReader for the provided XML file.
|
||
171 | * The reader uses the right encoding, as specified in
|
||
172 | * the XML header (or autodetected).
|
||
173 | *
|
||
174 | * @return A reader which uses the right encoding, or null
|
||
175 | * if the encoding couldn't be correctly detected or read
|
||
176 | * from the XML header.
|
||
177 | */
|
||
178 | public InputStreamReader getReader() { |
||
179 | String encoding = getEncoding();
|
||
180 | if (encoding==null) |
||
181 | return null; |
||
182 | try {
|
||
183 | return new InputStreamReader(_is, encoding); |
||
184 | } catch (UnsupportedEncodingException e) { |
||
185 | return null; |
||
186 | } |
||
187 | } |
||
188 | |||
189 | /**
|
||
190 | * Gets an InputStreamReader for the provided XML file.
|
||
191 | * The reader uses the right encoding, as specified in
|
||
192 | * the XML header (or autodetected).
|
||
193 | *
|
||
194 | * @param is An InputStream connected to the XML file to process
|
||
195 | * @return A reader for the provided XML file.
|
||
196 | * @see getReader()
|
||
197 | */
|
||
198 | public static InputStreamReader getReader(InputStream is) { |
||
199 | XMLEncodingUtils util = new XMLEncodingUtils(is);
|
||
200 | return util.getReader();
|
||
201 | } |
||
202 | |||
203 | /**
|
||
204 | * Gets the character encoding of the XML file.
|
||
205 | *
|
||
206 | * @param is An InputStream connected to the XML file to process
|
||
207 | * @see getEncoding()
|
||
208 | * @return The encoding of the file
|
||
209 | */
|
||
210 | public static String getEncoding(InputStream is) { |
||
211 | XMLEncodingUtils util = new XMLEncodingUtils(is);
|
||
212 | return util.getEncoding();
|
||
213 | } |
||
214 | |||
215 | /**
|
||
216 | * Gets an InputStreamReader for the provided XML file.
|
||
217 | * The reader uses the right encoding, as specified in
|
||
218 | * the XML header (or autodetected).
|
||
219 | *
|
||
220 | * @param file The XML file to process
|
||
221 | * @return A reader for the provided XML file.
|
||
222 | * @see getReader()
|
||
223 | */
|
||
224 | public static InputStreamReader getReader(File file) throws FileNotFoundException { |
||
225 | BufferedInputStream bs = new BufferedInputStream(new FileInputStream(file)); |
||
226 | XMLEncodingUtils util = new XMLEncodingUtils(bs);
|
||
227 | return util.getReader();
|
||
228 | } |
||
229 | |||
230 | /**
|
||
231 | * Gets the character encoding of the XML file.
|
||
232 | *
|
||
233 | * @param File The XML file to process
|
||
234 | * @see getEncoding()
|
||
235 | * @return The encoding of the file
|
||
236 | * @throws FileNotFoundException
|
||
237 | */
|
||
238 | public static String getEncoding(File file) throws FileNotFoundException { |
||
239 | BufferedInputStream bs = new BufferedInputStream(new FileInputStream(file)); |
||
240 | XMLEncodingUtils util = new XMLEncodingUtils(bs);
|
||
241 | return util.getEncoding();
|
||
242 | } |
||
243 | } |