刚开始使用在线XML格式化网站的时候就想写一个一样功能的java程序出来。
刚开始的设想是:使用XML解析库(如DOM)解析XML并序列化得到的结果,主要使用Transformer
代码如下:
package com.example;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.StringReader;
import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
import org.xml.sax.InputSource;
/**
* 将格式化的XML压缩成无空格、换行的XML。
*/
public class XMLCompress {
public static void main(String[] args) throws Exception {
String formattedXml = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" +
"<Root>\n" +
" <Head>\n" +
" <enterid>aaaaaa</enterid>\n" +
" <txdate>20230710</txdate>\n" +
" <seqno>20230711105728361668</seqno>\n" +
" <inflag>BM</inflag>\n" +
" <termno>1</termno>\n" +
" <signtxt></signtxt>\n" +
" </Head>\n" +
" <Body>\n" +
" <acctname1>测试华</acctname1>\n" +
" <acctno1>6200000000000003642</acctno1>\n" +
" <curry>01</curry>\n" +
" <txamt>1.00</txamt>\n" +
" <busitype>2</busitype>\n" +
" <remark>备注:(主体和转入帐号、户名不能为空)</remark>\n" +
" <accttype1>bm</accttype1>\n" +
" <accttype2>hzz</accttype2>\n" +
" <acctname2>边民互助组一</acctname2>\n" +
" <acctno2>172612010105613788</acctno2>\n" +
" </Body>\n" +
"</Root>";
// System.out.println("原:"+formattedXml);
// System.out.println("--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------");
String compactXml = compactXml(formattedXml);
System.out.println("解析:\n" + compactXml);
System.out.println("位数:" + compactXml.getBytes(StandardCharsets.UTF_8).length);
System.out.println("---------------------------------------------------------------------------------------------------------------------------------------------");
String test="<?xml version=\"1.0\" encoding=\"utf-8\"?><Root><Head><enterid>aaaaaa</enterid><txdate>20230710</txdate><seqno>20230711105728361668</seqno><inflag>BM</inflag><termno>1</termno><signtxt></signtxt></Head><Body><acctname1>测试华</acctname1><acctno1>6200000000000003642</acctno1><curry>01</curry><txamt>1.00</txamt><busitype>2</busitype><remark>备注:(主体和转入帐号、户名不能为空)</remark><accttype1>bm</accttype1><accttype2>hzz</accttype2><acctname2>边民互助组一</acctname2><acctno2>172612010105613788</acctno2></Body></Root>";
System.out.println("真压缩:\n"+test);
System.out.println(test.getBytes(StandardCharsets.UTF_8).length);
System.out.println("比较:"+(test.getBytes(StandardCharsets.UTF_8).length == compactXml.getBytes(StandardCharsets.UTF_8).length));
// System.out.println("--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------");
// String test="<?xml version=\"1.0\" encoding=\"utf-8\"?><Root><Head><enterid>aaaaaa</enterid><txdate>20230626</txdate><seqno>20230626000000000008</seqno><inflag>BM</inflag><termno>1</termno><signtxt>XDdwTo4r/n9bGNyYVOT9B/2n6vQQWyUdAbETGJWVjdjap75ayM4bjIbzBQC7/8KVR2vdQmpJdYkSliGg2fx9pP9Lagp9EzN9xXUvhym5/TllTSTPJAoBF7e0yQXaDkTgzo5m/pZOqzgunaCaf/Eje9ipyjvVjjMF3RPndHhZ7BM=</signtxt></Head><Body><acctname1>测试华</acctname1><acctno1>6200000000000003642</acctno1><curry>01</curry><txamt>1.00</txamt><busitype>2</busitype><remark>备注:</remark><accttype1>bm</accttype1><accttype2>hzz</accttype2><acctname2>边民互助组一</acctname2><acctno2>172612010105613788</acctno2></Body></Root>";
// System.out.println("真:"+test);
// System.out.println("比较:"+compactXml.equals(test));
}
public static String compactXml(String xmlStr) throws Exception {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse(new InputSource(new StringReader(xmlStr)));
removeWhitespaceNodes(doc.getDocumentElement());
TransformerFactory tfactory = TransformerFactory.newInstance();
Transformer transformer = tfactory.newTransformer();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
transformer.setOutputProperty(OutputKeys.INDENT, "no");
StringWriter writer = new StringWriter();
transformer.transform(new DOMSource(doc), new StreamResult(writer));
// Manually add the XML declaration, so it doesn't include the standalone attribute
return "<?xml version=\"1.0\" encoding=\"utf-8\"?>" + writer.toString();
}
private static void removeWhitespaceNodes(Node node) {
NodeList children = node.getChildNodes();
for (int i = children.getLength() - 1; i >= 0; i--) {
Node child = children.item(i);
if (child.getNodeType() == Node.TEXT_NODE && !child.getNodeValue().equals(" ") && child.getNodeValue().trim().isEmpty()) {
node.removeChild(child);
} else if (child.getNodeType() == Node.ELEMENT_NODE) {
removeWhitespaceNodes(child);
}
}
}
}
但实际运行后和在线XML格式化网站的结果对比发现有问题;程序存在错误,因为使用XML解析库(如DOM)解析XML并序列化得到的结果.所以在压缩时会压缩成。导致原XML内容长度位数发生改变。
将压缩成了。这是因为我们使用了Transformer来序列化XML,而Transformer默认会将空元素转换为自闭合标签。
查阅后发现根据XML语法,和是等价的,它们都表示一个空的signtxt元素。所以,当使用XML解析库解析并序列化XML时,会自动转换为。这是正常的,并符合XML规范。
上述问题解决了很久解决不了,因为是XML规范导致的错误。所以就采用了下面的其他方法
方法二:使用replaceAll直接替换空格和换行。
后面冷静下来才发现,str.replaceAll(“\n”, “”);replaceAll(” “, “”);直接替换就好了,再加上一个从XML声明的”>”后开始替换空格,一共就三行代码,,,,,,
原来那个XML解析库(如DOM)解析XML,根本不用这么复杂。
详细代码如下:
package com.example;
import java.nio.charset.StandardCharsets;
/**
* 将格式化的XML压缩成无空格、换行的XML。
*/
public class XMLCompress {
public static String xmlCompress(String str){
String compressedString = str.replaceAll("\n", "");
int startIndex = compressedString.indexOf(">") + 1;
compressedString = compressedString.substring(0, startIndex) + compressedString.substring(startIndex).replaceAll(" ", "");
System.out.println("解析:\n" + compressedString);
System.out.println("位数:" + compressedString.getBytes(StandardCharsets.UTF_8).length);
System.out.println("---------------------------------------------------------------------------------------------------------------------------------------------");
String test="<?xml version=\"1.0\" encoding=\"utf-8\"?><Root><Head><enterid>aaaaaa</enterid><txdate>20230710</txdate><seqno>20230711105728361668</seqno><inflag>BM</inflag><termno>1</termno><signtxt></signtxt></Head><Body><acctname1>测试华</acctname1><acctno1>6200000000000003642</acctno1><curry>01</curry><txamt>1.00</txamt><busitype>2</busitype><remark>备注:(主体和转入帐号、户名不能为空)</remark><accttype1>bm</accttype1><accttype2>hzz</accttype2><acctname2>边民互助组一</acctname2><acctno2>172612010105613788</acctno2></Body></Root>";
System.out.println("真压缩:\n"+test);
System.out.println(test.getBytes(StandardCharsets.UTF_8).length);
System.out.println("比较:"+(test.getBytes(StandardCharsets.UTF_8).length == compressedString.getBytes(StandardCharsets.UTF_8).length));
return compressedString;
}
public static void main(String[] args) throws Exception {
String xmlString = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" +
"<Root>\n" +
" <Head>\n" +
" <enterid>NPJPZH</enterid>\n" +
" <txdate>20230710</txdate>\n" +
" <seqno>20230711105728361668</seqno>\n" +
" <inflag>BM</inflag>\n" +
" <termno>1</termno>\n" +
" <signtxt></signtxt>\n" +
" </Head>\n" +
" <Body>\n" +
" <acctname1>测试华</acctname1>\n" +
" <acctno1>6229920200000003642</acctno1>\n" +
" <curry>01</curry>\n" +
" <txamt>1.00</txamt>\n" +
" <busitype>2</busitype>\n" +
" <remark>备注:(主体和转入帐号、户名不能为空)</remark>\n" +
" <accttype1>bm</accttype1>\n" +
" <accttype2>hzz</accttype2>\n" +
" <acctname2>边民互助组一</acctname2>\n" +
" <acctno2>172612010105613788</acctno2>\n" +
" </Body>\n" +
"</Root>";
xmlCompress(xmlString);
}
}
版权声明:本文为MAPLE__f原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。