java - Special characters in xml when transforming from document to clob -


i pass xml in clob oracle java source, signs xml , returns result, returned result contains special characters. f.e.

input xml:

<a>žė</a> 

output xml:

<a>&#382;&#279;</a> 

if try print out result in java there's no special characters, in oracle these. if character stream first clob, , assign result clob, there's no special characters, appear when transform document clob.

code reproduce:

create or replace , compile java source named test_encoding import java.io.bufferedreader; import java.io.bytearrayinputstream; import java.io.bytearrayoutputstream; import java.io.fileinputstream; import java.io.inputstream; import java.io.inputstreamreader; import java.io.reader; import java.io.bufferedreader; import java.io.writer; import java.io.stringwriter; import java.security.*; import java.security.cert.x509certificate; import java.sql.clob; import java.sql.connection; import java.sql.drivermanager; import java.sql.preparedstatement; import java.text.simpledateformat; import java.util.arraylist; import java.util.arrays; import java.util.collections; import java.util.enumeration; import java.util.hashmap; import java.util.list; import java.util.map; import java.util.timezone;  import javax.xml.parsers.documentbuilderfactory; import javax.xml.transform.outputkeys; import javax.xml.transform.transformer; import javax.xml.transform.transformerfactory; import javax.xml.transform.result; import javax.xml.transform.source; import javax.xml.transform.dom.domsource; import javax.xml.transform.stream.streamresult;  import org.w3c.dom.attr; import org.w3c.dom.document; import org.w3c.dom.element; import org.w3c.dom.namednodemap; import org.w3c.dom.node; import org.w3c.dom.nodelist;  import sun.misc.base64encoder; import sun.misc.base64decoder;  public class test_encoding {    public static clob getxml(clob inputclob, clob outputclob) throws exception{      document document = getdocument(inputclob);      //outputclob = inputclob;     //string inputstring = getstringfromis(readertoinputstream(inputclob.getcharacterstream()));     string inputstring = getstringfromis(getinputstreamfromdocument(document));     writer writer = outputclob.setcharacterstream(1);     writer.write(inputstring.tochararray());     writer.close();      return outputclob;    }    public static inputstream readertoinputstream(reader reader) throws exception {     char[] charbuffer = new char[8 * 1024];     stringbuilder builder = new stringbuilder();     int numcharsread;     while ((numcharsread = reader.read(charbuffer, 0, charbuffer.length)) != -1) {       builder.append(charbuffer, 0, numcharsread);     }      return new bytearrayinputstream(builder.tostring().getbytes("utf-8"));     }    public static string getstringfromis(inputstream is) throws exception {         bufferedreader in = new bufferedreader(new inputstreamreader(is, "utf-8"));         string str = "";         string line;         while ((line = in.readline()) != null) {             str = str.concat(line);         }          return str;     }    public static document getdocument(clob xmlclob) throws exception {     documentbuilderfactory dbf = documentbuilderfactory.newinstance();     dbf.setnamespaceaware(true);      return dbf.newdocumentbuilder().parse(readertoinputstream(xmlclob.getcharacterstream()));   }    public static inputstream getinputstreamfromelement(element element) throws exception {         bytearrayoutputstream outputstream = new bytearrayoutputstream();         source xmlsource = new domsource(element);         result outputtarget = new streamresult(outputstream);         transformerfactory.newinstance().newtransformer().transform(xmlsource, outputtarget);         inputstream = new bytearrayinputstream(outputstream.tobytearray());          return is;     }    public static inputstream getinputstreamfromdocument(document document) throws exception {         bytearrayoutputstream outputstream = new bytearrayoutputstream();         source xmlsource = new domsource(document);         result outputtarget = new streamresult(outputstream);         transformerfactory.newinstance().newtransformer().transform(xmlsource, outputtarget);         inputstream = new bytearrayinputstream(outputstream.tobytearray());          return is;     }  } 

plsql function:

create or replace function test_encoding(   p_input_clob clob,   p_output_clop clob)  --return varchar2  return clob   --language java name 'signxmlnew.signxml(java.sql.clob, java.lang.string, java.lang.string, java.lang.string, java.lang.string) return java.lang.string';   language java name 'test_encoding.getxml(java.sql.clob, java.sql.clob) return java.sql.clob'; 

script:

declare   l_input_xml clob := xmltype('<a>žė</a>').getclobval();   l_output_xml clob; begin   dbms_lob.createtemporary(l_output_xml, false);   dbms_output.put_line(l_input_xml);   l_output_xml := test_encoding(l_input_xml, l_output_xml);   dbms_output.put_line(l_output_xml); end; / 

update

pinpointed location character "žė" converted html characters. it's when transforming document. i've transformed document byte[], , printed hexademical representation of , html special characters there. i've done same in eclipse , there's no html characters there. f.e.

result in oracle:  3c613e26233338323b26233237393b3c2f613e result in eclipse: 3c613ec5bec4973c2f613e 

i'm using same functions convert document byte[] , print out hexademical representation of byte array, both in oracle , in eclipse.

function convert document byte[]:

public static byte[] getdocumentbytearray(document doc) throws exception {     bytearrayoutputstream outputstream = new bytearrayoutputstream();     source xmlsource = new domsource(doc);     transformer transformer = transformerfactory.newinstance().newtransformer();     transformer.setoutputproperty(outputkeys.omit_xml_declaration, "yes");     transformer.setoutputproperty(outputkeys.encoding, "utf-8");     result outputtarget = new streamresult(outputstream);     transformer.transform(xmlsource, outputtarget);      return outputstream.tobytearray(); }  public static string bytestohex(byte[] bytes) {     char[] hexarray = "0123456789abcdef".tochararray();     char[] hexchars = new char[bytes.length * 2];     (int j = 0; j < bytes.length; j++) {         int v = bytes[j] & 0xff;         hexchars[j * 2] = hexarray[v >>> 4];         hexchars[j * 2 + 1] = hexarray[v & 0x0f];     }     return new string(hexchars); } 

could transformer takes locale encoding, example in eclipse takes encoding pc's nls_lang , in oracle takes oracles servers nls_lang. if so, how can specify encoding use, cause seems

transformer.setoutputproperty(outputkeys.encoding, "utf-8"); 

doesn't trick.

it seems there's bug transformer when using in oracle. we've figured workaround this, using domparser, xmldocument , xmlelement. if there's work rewrite code using these can transform xml using these @ end before returning value. f.e.

... inputstream elementis = getinputstreamfromelement(soapenvelope); retclob = setcharacterstream(elementis, retclob);  return retclob;  public static inputstream getinputstreamfromelement(element element) throws exception {     bytearrayoutputstream outputstream = new bytearrayoutputstream();     source xmlsource = new domsource(element);     result outputtarget = new streamresult(outputstream);     transformerfactory.newinstance().newtransformer().transform(xmlsource, outputtarget);     inputstream = new bytearrayinputstream(outputstream.tobytearray());      return is; }  public static clob setcharacterstream(inputstream inputstream, clob clob) throws exception{     domparser parser = new domparser();     parser.parse(inputstream);     xmldocument xmldocument = parser.getdocument();     xmlelement xmlpayload = (xmlelement) xmldocument.getdocumentelement();     writer writer = clob.setcharacterstream(1);     xmlpayload.print(writer);      return clob; } 

Comments

Popular posts from this blog

php - How to add and update images or image url in Volusion using Volusion API -

Laravel mail error `Swift_TransportException in StreamBuffer.php line 269: Connection could not be established with host smtp.gmail.com [ #0]` -

c# SetCompatibleTextRenderingDefault must be called before the first -