Statistics
| Revision:

svn-gvsig-desktop / trunk / org.gvsig.desktop / org.gvsig.desktop.compat.cdc / org.gvsig.fmap.dal / org.gvsig.fmap.dal.file / org.gvsig.fmap.dal.file.csv / src / main / java / org / gvsig / fmap / dal / store / gml / TestXML.java @ 47638

History | View | Annotate | Download (14.6 KB)

1
package org.gvsig.fmap.dal.store.gml;
2

    
3
import java.io.File;
4
import java.io.FileInputStream;
5
import java.io.InputStreamReader;
6
import java.io.Reader;
7
import java.nio.charset.Charset;
8
import java.nio.charset.StandardCharsets;
9
import java.util.ArrayList;
10
import java.util.Collections;
11
import java.util.HashMap;
12
import java.util.HashSet;
13
import java.util.Iterator;
14
import java.util.LinkedHashMap;
15
import java.util.LinkedHashSet;
16
import java.util.List;
17
import java.util.Map;
18
import java.util.Set;
19
import javax.xml.parsers.SAXParser;
20
import javax.xml.parsers.SAXParserFactory;
21
import org.apache.commons.io.FilenameUtils;
22
import org.apache.commons.lang3.StringUtils;
23
import org.apache.commons.lang3.mutable.MutableLong;
24
import org.gvsig.tools.namestranslator.NamesTranslator;
25
import org.xml.sax.Attributes;
26
import org.xml.sax.InputSource;
27
import org.xml.sax.Locator;
28
import org.xml.sax.SAXException;
29
import org.xml.sax.helpers.DefaultHandler;
30

    
31
public class TestXML {
32
    
33
    private static final String XMLFILE1 = "/home/jjdelcerro/datos/geodata/vector/sigpac/2018/Declaracion.xml";
34
    private static final String XMLFILE2 = "/home/jjdelcerro/datos/geodata/vector/ARENA2/quincenas-0/TV_03_2019_01_Q1/victimas.xml";
35
    private static final String XMLFILE_BIG = "/home/jjdelcerro/datos/geodata/vector/RSUPAC/2020/BDA_RSU_PAC20_1713052020_001.XML";
36
            
37
    public static void main(String[] args) throws Exception {
38
        TestXML t = new TestXML();
39
        
40
        List<String> tags = new ArrayList(t.extractTags(XMLFILE_BIG));
41
        Collections.sort(tags);
42
        
43
//        System.out.println("Tags: -------------------------");
44
//        for (String tag : tags) {
45
//            System.out.println(tag);
46
//        }
47
        Map<String,List<String>> tables = new LinkedHashMap<>();
48
        Map<String,Set<String>> tables_tmp = new LinkedHashMap<>();
49
        for (String tag1 : tags) {
50
            String tableName = tag1;
51
            Set fields = new HashSet<>();
52
            for (String tag2 : tags) {
53
                if( tableName.equals(tag2) ) {
54
                    continue;
55
                }
56
                if( tag2.startsWith(tableName) ) {
57
                    String fieldName = tag2.substring(tableName.length()+1);
58
                    if( !fieldName.contains("/") ) {
59
                        fields.add(fieldName);
60
                    }
61
                }
62
            }
63
            if( !fields.isEmpty() ) {
64
                tables_tmp.put(tableName,fields);
65
            }
66
        }
67
        Map<String,String> tableNames = new HashMap<>();
68
//        NamesTranslator nt = NamesTranslator.createTrimTranslator(45);
69
        for (Map.Entry<String, Set<String>> entry : tables_tmp.entrySet()) {
70
            String fullTableName = entry.getKey();
71
            String tableName = FilenameUtils.getBaseName(fullTableName);
72
//            int i = nt.addSource(fullTableName);
73
//            tableNames.put(tableName,nt.getTranslation(i));
74
            tableNames.put(fullTableName,tableName);
75
        }
76
        for (Map.Entry<String, Set<String>> entry : tables_tmp.entrySet()) {
77
            String tableName = entry.getKey();
78
            List<String> fields = new ArrayList<>();
79
            String[] fieldkeys = tableName.split("/");
80
            for (int i = 0; i < fieldkeys.length; i++) {
81
                String fieldkey = fieldkeys[i];
82
                if( i==fieldkeys.length-1 ) {
83
                    fields.add("$ID_"+fieldkey+", PK");
84
                } else {
85
                    fields.add("$ID_"+fieldkey+", FK");
86
                }
87
            }
88
            for (String fieldName : entry.getValue()) {
89
                if( tables_tmp.containsKey(tableName+"/"+fieldName) ) {
90
                    fields.add("$ID_"+fieldName+" FK");
91
                } else {
92
                    fields.add(fieldName);
93
                }
94
            }            
95
            Collections.sort(fields);
96
            tables.put(tableName, fields);
97
        }
98

    
99
        int n = 1;
100
        for (Map.Entry<String, List<String>> entry : tables.entrySet()) {
101
            String tableName = entry.getKey();
102
            List<String> fields = entry.getValue();
103
            System.out.println("TABLE "+tableNames.get(tableName)+" ("+n+", "+tableName+")");
104
            for (String field : fields) {
105
                System.out.println("    "+field);
106
            }
107
            n++;
108
        }
109

    
110

    
111
//        List<List<String>> records = t.getRecords1(
112
//                XMLFILE1, 
113
//                "DECLARACION/LINEA_DECLARACION", 
114
//                ListBuilder.create(
115
//                    "DECLARACION/LINEA_DECLARACION/PROV",
116
////                    "DECLARACION/LINEA_DECLARACION/MUN_INE",
117
////                    "DECLARACION/LINEA_DECLARACION/MUN_CAT",
118
////                    "DECLARACION/LINEA_DECLARACION/AGREGADO",
119
//                    "DECLARACION/LINEA_DECLARACION/ZONA",
120
//                    "DECLARACION/LINEA_DECLARACION/POLIGONO",
121
//                    "DECLARACION/LINEA_DECLARACION/PARCELA",
122
//                    "DECLARACION/LINEA_DECLARACION/RECINTO",
123
//                    "DECLARACION/LINEA_DECLARACION/PARCELA_AGRICOLA",
124
//                    "DECLARACION/LINEA_DECLARACION/CULTIVO",
125
////                    "DECLARACION/LINEA_DECLARACION/WKT",
126
//                    "DECLARACION/LINEA_DECLARACION/DN_SURFACE",
127
//                    "DECLARACION/LINEA_DECLARACION/SUPERFICIE_DECLARADA"
128
////                    "DECLARACION/LINEA_DECLARACION/FC_ALMENDROS",
129
////                    "DECLARACION/LINEA_DECLARACION/FC_ALGARROBOS",
130
////                    "DECLARACION/LINEA_DECLARACION/FC_AVELLANOS",
131
////                    "DECLARACION/LINEA_DECLARACION/FC_NOGALES",
132
////                    "DECLARACION/LINEA_DECLARACION/FC_PISTACHOS",
133
////                    "DECLARACION/LINEA_DECLARACION/FC_TOTAL"
134
//                )
135
//        );
136
//        for (List<String> record : records) {
137
//            System.out.println(StringUtils.join(record, ","));
138
//        }
139
    }
140
    
141
    private Reader openFileReader(File xmlfile) throws Exception  {
142
        FileInputStream fis = new FileInputStream(xmlfile);
143
//        EncodingDetector encodingDetector = TikaConfig.getDefaultConfig().getEncodingDetector();
144
//        Charset encoding = encodingDetector.detect(fis, null);
145
        Charset encoding = StandardCharsets.UTF_8;
146
        InputStreamReader reader = new InputStreamReader(fis, encoding);
147
        return reader;
148
    }
149
    
150
    private Set<String> extractTags(String xmlfile) throws Exception {
151
        SAXParserFactory spf = SAXParserFactory.newInstance();
152
        spf.setNamespaceAware(true);
153
        SAXParser saxParser = spf.newSAXParser();
154
//        RandomAccessFileReader reader = new RandomAccessFileReader(new File(xmlfile), StandardCharsets.UTF_8);
155
        Reader reader = openFileReader(new File(xmlfile));
156
        InputSource is = new InputSource(reader);
157

    
158
        List<String> path = new ArrayList<>();
159
        Set<String> tags = new LinkedHashSet<>();
160

    
161
//        System.out.println("Parse (extract-tags): -------------------------");
162
        saxParser.parse(is, new DefaultHandler() {
163
            private Locator locator;
164
            
165
            @Override
166
            public void setDocumentLocator(Locator locator) {
167
                this.locator = locator;
168
            }
169
            
170
            @Override
171
            public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
172
                    path.add(localName);
173

    
174
                    int line = this.locator.getLineNumber();
175
                    int column = this.locator.getColumnNumber()-2-localName.length();
176
                    String path_s = StringUtils.join(path, "/");
177

    
178
                    tags.add(path_s);
179
                    for (int i = 0; i < attributes.getLength(); i++) {
180
                        String name = attributes.getLocalName(i);
181
                        tags.add(path_s+"/#"+name);
182
                    }
183
//                    if( "DECLARACION/LINEA_DECLARACION".equals(StringUtils.join(path, "/")) ) {
184
//                        System.out.println(path_s+": "+line+":"+column);
185
//                    }
186
//                    if( "DECLARACION".equals(StringUtils.join(path, "/")) ) {
187
//                        System.out.println(path_s+": "+line+":"+column);
188
//                    }
189
            }
190

    
191
            @Override
192
            public void endElement(String uri, String localName, String qName) throws SAXException {
193
                    path.remove(path.size()-1);
194
            }
195
        });
196

    
197
        return tags;
198
    }
199
    
200
    private List<List<String>> getRecords1(String xmlfile, String recordPath, List<String>fieldPaths) throws Exception {
201
        class ParseRecordsHandler extends DefaultHandler {
202
            Locator locator;
203
            List<String> path = new ArrayList<>();
204
            List<List<String>> records = new ArrayList<>();
205
            Map<String,String> record = new HashMap<>();
206
            StringBuilder value = new StringBuilder();
207
            
208
            @Override
209
            public void setDocumentLocator(Locator locator) {
210
                this.locator = locator;
211
            }
212
            
213
            @Override
214
            public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
215
                    path.add(localName);
216

    
217
                    int line = this.locator.getLineNumber();
218
                    int column = this.locator.getColumnNumber()-2-localName.length();
219
                    String path_s = StringUtils.join(path, "/");
220

    
221
                    if( StringUtils.equalsIgnoreCase(path_s, recordPath) ) {
222
                        return;
223
                    }
224
                    this.value.setLength(0);
225
//                    this.value.trimToSize();
226
            }
227

    
228
            @Override
229
            public void characters(char[] ch, int start, int length) throws SAXException {
230
                value.append(new String(ch, start, length));
231
            }
232
            
233
            @Override
234
            public void endElement(String uri, String localName, String qName) throws SAXException {
235
                    int line = this.locator.getLineNumber();
236
                    int column = this.locator.getColumnNumber()-2-localName.length();
237
                    String path_s = StringUtils.join(path, "/");
238

    
239
                    if( StringUtils.equalsIgnoreCase(path_s, recordPath) ) {
240
                        List<String> values = new ArrayList<>();
241
                        for (String fieldPath : fieldPaths) {
242
                            values.add(record.get(fieldPath));
243
                        }
244
                        records.add(values);
245
                        record.clear();
246
                    } else {
247
                        for (String fieldPath : fieldPaths) {
248
                            if( StringUtils.equalsIgnoreCase(path_s, fieldPath) ) {
249
                                record.put(fieldPath, this.value.toString());
250
                                // No break to handle repeated columns
251
                            }
252
                        }
253
                    }
254

    
255
                    path.remove(path.size()-1);
256
            }
257
        }
258
        
259
        SAXParserFactory spf = SAXParserFactory.newInstance();
260
        spf.setNamespaceAware(true);
261
        SAXParser saxParser = spf.newSAXParser();
262
//        RandomAccessFileReader reader = new RandomAccessFileReader(new File(xmlfile), StandardCharsets.UTF_8);
263
        Reader reader = openFileReader(new File(xmlfile));
264
        InputSource is = new InputSource(reader);
265

    
266

    
267
        ParseRecordsHandler handler = new ParseRecordsHandler();
268
        
269
        System.out.println("Parse (getRecords): -------------------------");
270
        saxParser.parse(is, handler);
271
        
272
        return handler.records;
273
    }
274

    
275
    private long getRecordCount(String xmlfile, String recordPath) throws Exception {
276
        SAXParserFactory spf = SAXParserFactory.newInstance();
277
        spf.setNamespaceAware(true);
278
        SAXParser saxParser = spf.newSAXParser();
279
        Reader reader = openFileReader(new File(xmlfile));
280
        InputSource is = new InputSource(reader);
281

    
282
        List<String> path = new ArrayList<>();
283
        MutableLong numRecords = new MutableLong(0);
284

    
285
        System.out.println("Parse (countRecords)");
286
        saxParser.parse(is, new DefaultHandler() {
287
            @Override
288
            public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
289
                    path.add(localName);
290
                    String path_s = StringUtils.join(path, "/");
291

    
292
                    if( StringUtils.equalsIgnoreCase(path_s, recordPath) ) {
293
                        numRecords.increment();
294
                    }
295
            }
296

    
297
            @Override
298
            public void endElement(String uri, String localName, String qName) throws SAXException {
299
                    path.remove(path.size()-1);
300
            }
301
        });
302

    
303
        return numRecords.longValue();
304
    }
305
    
306
//    private void createIndex(String xmlfile, String recordPath) throws Exception {
307
//        
308
//        long countRecords = getRecordCount(xmlfile, recordPath);
309
//        if (countRecords < 1) {
310
//            return;
311
//        }
312
//        RandomAccessFileIndex record_idx = new RandomAccessFileIndex();
313
//        record_idx.create(fileIndex, countRecords);
314
//
315
//        SAXParserFactory spf = SAXParserFactory.newInstance();
316
//        spf.setNamespaceAware(true);
317
//        SAXParser saxParser = spf.newSAXParser();
318
//        Reader reader = openFileReader(new File(xmlfile));
319
//        InputSource is = new InputSource(reader);
320
//        List<String> path = new ArrayList<>();
321
//
322
//        System.out.println("Parse (createIndex)");
323
//        saxParser.parse(is, new DefaultHandler() {
324
//            Locator locator;
325
//            
326
//            @Override
327
//            public void setDocumentLocator(Locator locator) {
328
//                this.locator = locator;
329
//            }
330
//            @Override
331
//            public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
332
//                    path.add(localName);
333
//                    int line = this.locator.getLineNumber();
334
//                    int column = this.locator.getColumnNumber()-2-localName.length();
335
//                    String path_s = StringUtils.join(path, "/");
336
//
337
//                    if( StringUtils.equalsIgnoreCase(path_s, recordPath) ) {
338
//                        record_idx.set(lineno++, position);
339
//                    }
340
//            }
341
//
342
//            @Override
343
//            public void endElement(String uri, String localName, String qName) throws SAXException {
344
//                    path.remove(path.size()-1);
345
//            }
346
//        });
347
//
348
//        return numRecords.longValue();
349
//    }
350
    
351
    private void test() {
352
    }
353

    
354
}