java - Special characters in xml when transforming from document to clob -
i pass xml in clob oracle java source, signs xml , returns result, returned result contains special characters. f.e.
input xml:
<a>žė</a>
output xml:
<a>žė</a>
if try print out result in java there's no special characters, in oracle these. if character stream first clob, , assign result clob, there's no special characters, appear when transform document clob.
code reproduce:
create or replace , compile java source named test_encoding import java.io.bufferedreader; import java.io.bytearrayinputstream; import java.io.bytearrayoutputstream; import java.io.fileinputstream; import java.io.inputstream; import java.io.inputstreamreader; import java.io.reader; import java.io.bufferedreader; import java.io.writer; import java.io.stringwriter; import java.security.*; import java.security.cert.x509certificate; import java.sql.clob; import java.sql.connection; import java.sql.drivermanager; import java.sql.preparedstatement; import java.text.simpledateformat; import java.util.arraylist; import java.util.arrays; import java.util.collections; import java.util.enumeration; import java.util.hashmap; import java.util.list; import java.util.map; import java.util.timezone; import javax.xml.parsers.documentbuilderfactory; import javax.xml.transform.outputkeys; import javax.xml.transform.transformer; import javax.xml.transform.transformerfactory; import javax.xml.transform.result; import javax.xml.transform.source; import javax.xml.transform.dom.domsource; import javax.xml.transform.stream.streamresult; import org.w3c.dom.attr; import org.w3c.dom.document; import org.w3c.dom.element; import org.w3c.dom.namednodemap; import org.w3c.dom.node; import org.w3c.dom.nodelist; import sun.misc.base64encoder; import sun.misc.base64decoder; public class test_encoding { public static clob getxml(clob inputclob, clob outputclob) throws exception{ document document = getdocument(inputclob); //outputclob = inputclob; //string inputstring = getstringfromis(readertoinputstream(inputclob.getcharacterstream())); string inputstring = getstringfromis(getinputstreamfromdocument(document)); writer writer = outputclob.setcharacterstream(1); writer.write(inputstring.tochararray()); writer.close(); return outputclob; } public static inputstream readertoinputstream(reader reader) throws exception { char[] charbuffer = new char[8 * 1024]; stringbuilder builder = new stringbuilder(); int numcharsread; while ((numcharsread = reader.read(charbuffer, 0, charbuffer.length)) != -1) { builder.append(charbuffer, 0, numcharsread); } return new bytearrayinputstream(builder.tostring().getbytes("utf-8")); } public static string getstringfromis(inputstream is) throws exception { bufferedreader in = new bufferedreader(new inputstreamreader(is, "utf-8")); string str = ""; string line; while ((line = in.readline()) != null) { str = str.concat(line); } return str; } public static document getdocument(clob xmlclob) throws exception { documentbuilderfactory dbf = documentbuilderfactory.newinstance(); dbf.setnamespaceaware(true); return dbf.newdocumentbuilder().parse(readertoinputstream(xmlclob.getcharacterstream())); } public static inputstream getinputstreamfromelement(element element) throws exception { bytearrayoutputstream outputstream = new bytearrayoutputstream(); source xmlsource = new domsource(element); result outputtarget = new streamresult(outputstream); transformerfactory.newinstance().newtransformer().transform(xmlsource, outputtarget); inputstream = new bytearrayinputstream(outputstream.tobytearray()); return is; } public static inputstream getinputstreamfromdocument(document document) throws exception { bytearrayoutputstream outputstream = new bytearrayoutputstream(); source xmlsource = new domsource(document); result outputtarget = new streamresult(outputstream); transformerfactory.newinstance().newtransformer().transform(xmlsource, outputtarget); inputstream = new bytearrayinputstream(outputstream.tobytearray()); return is; } }
plsql function:
create or replace function test_encoding( p_input_clob clob, p_output_clop clob) --return varchar2 return clob --language java name 'signxmlnew.signxml(java.sql.clob, java.lang.string, java.lang.string, java.lang.string, java.lang.string) return java.lang.string'; language java name 'test_encoding.getxml(java.sql.clob, java.sql.clob) return java.sql.clob';
script:
declare l_input_xml clob := xmltype('<a>žė</a>').getclobval(); l_output_xml clob; begin dbms_lob.createtemporary(l_output_xml, false); dbms_output.put_line(l_input_xml); l_output_xml := test_encoding(l_input_xml, l_output_xml); dbms_output.put_line(l_output_xml); end; /
update
pinpointed location character "žė" converted html characters. it's when transforming document. i've transformed document byte[], , printed hexademical representation of , html special characters there. i've done same in eclipse , there's no html characters there. f.e.
result in oracle: 3c613e26233338323b26233237393b3c2f613e result in eclipse: 3c613ec5bec4973c2f613e
i'm using same functions convert document byte[] , print out hexademical representation of byte array, both in oracle , in eclipse.
function convert document byte[]:
public static byte[] getdocumentbytearray(document doc) throws exception { bytearrayoutputstream outputstream = new bytearrayoutputstream(); source xmlsource = new domsource(doc); transformer transformer = transformerfactory.newinstance().newtransformer(); transformer.setoutputproperty(outputkeys.omit_xml_declaration, "yes"); transformer.setoutputproperty(outputkeys.encoding, "utf-8"); result outputtarget = new streamresult(outputstream); transformer.transform(xmlsource, outputtarget); return outputstream.tobytearray(); } public static string bytestohex(byte[] bytes) { char[] hexarray = "0123456789abcdef".tochararray(); char[] hexchars = new char[bytes.length * 2]; (int j = 0; j < bytes.length; j++) { int v = bytes[j] & 0xff; hexchars[j * 2] = hexarray[v >>> 4]; hexchars[j * 2 + 1] = hexarray[v & 0x0f]; } return new string(hexchars); }
could transformer takes locale encoding, example in eclipse takes encoding pc's nls_lang , in oracle takes oracles servers nls_lang. if so, how can specify encoding use, cause seems
transformer.setoutputproperty(outputkeys.encoding, "utf-8");
doesn't trick.
it seems there's bug transformer
when using in oracle. we've figured workaround this, using domparser, xmldocument , xmlelement
. if there's work rewrite code using these can transform xml using these @ end before returning value. f.e.
... inputstream elementis = getinputstreamfromelement(soapenvelope); retclob = setcharacterstream(elementis, retclob); return retclob; public static inputstream getinputstreamfromelement(element element) throws exception { bytearrayoutputstream outputstream = new bytearrayoutputstream(); source xmlsource = new domsource(element); result outputtarget = new streamresult(outputstream); transformerfactory.newinstance().newtransformer().transform(xmlsource, outputtarget); inputstream = new bytearrayinputstream(outputstream.tobytearray()); return is; } public static clob setcharacterstream(inputstream inputstream, clob clob) throws exception{ domparser parser = new domparser(); parser.parse(inputstream); xmldocument xmldocument = parser.getdocument(); xmlelement xmlpayload = (xmlelement) xmldocument.getdocumentelement(); writer writer = clob.setcharacterstream(1); xmlpayload.print(writer); return clob; }
Comments
Post a Comment