Writing an example set as xml

data123data123 Member Posts: 20  Maven
edited December 2018 in Help

Hi,

I'd appreciate any help in writing an example set to xml.

I've tried the process below (as posted by Helge, http://rapid-i.com/rapidforum/index.php?topic=8256.0) but it doesn't combine the documents but instead produces only 1 record.

 


<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="6.0.008">
 <context>
   <input/>
   <output/>
   <macros/>
 </context>
 <operator activated="true" class="process" compatibility="6.0.008" expanded="true" name="Process">
   <process expanded="true">
     <operator activated="true" class="retrieve" compatibility="6.0.008" expanded="true" height="60" name="Retrieve Golf" width="90" x="45" y="210">
       <parameter key="repository_entry" value="//Samples/data/Golf"/>
     </operator>
     <operator activated="true" class="loop_examples" compatibility="6.0.008" expanded="true" height="94" name="Loop Examples" width="90" x="246" y="210">
       <process expanded="true">
         <operator activated="true" class="loop_attributes" compatibility="6.0.008" expanded="true" height="94" name="Loop Attributes" width="90" x="246" y="120">
           <process expanded="true">
             <operator activated="true" class="text:extract_document" compatibility="5.3.002" expanded="true" height="76" name="Extract Document" width="90" x="246" y="165">
               <parameter key="attribute_name" value="%{loop_attribute}"/>
               <parameter key="example_index" value="%{example}"/>
             </operator>
             <operator activated="true" class="text:create_document" compatibility="5.3.002" expanded="true" height="60" name="CLose Tag (2)" width="90" x="246" y="615">
               <parameter key="text" value="&lt;/%{loop_attribute}&gt;&#10;"/>
             </operator>
             <operator activated="true" class="text:create_document" compatibility="5.3.002" expanded="true" height="60" name="CLose Tag (3)" width="90" x="514" y="660">
               <parameter key="text" value="&lt;/%{loop_attribute}&gt;&#10;"/>
             </operator>
             <operator activated="true" class="text:create_document" compatibility="5.3.002" expanded="true" height="60" name="Open Tag" width="90" x="246" y="75">
               <parameter key="text" value="      &lt;%{loop_attribute}&gt;"/>
             </operator>
             <operator activated="true" class="text:create_document" compatibility="5.3.002" expanded="true" height="60" name="CLose Tag" width="90" x="246" y="255">
               <parameter key="text" value="&lt;/%{loop_attribute}&gt;&#10;"/>
             </operator>
             <operator activated="true" class="text:combine_documents" compatibility="5.3.002" expanded="true" height="112" name="Combine Documents" width="90" x="447" y="165"/>
             <connect from_port="example set" to_op="Extract Document" to_port="example set"/>
             <connect from_op="Extract Document" from_port="document" to_op="Combine Documents" to_port="documents 2"/>
             <connect from_op="Open Tag" from_port="output" to_op="Combine Documents" to_port="documents 1"/>
             <connect from_op="CLose Tag" from_port="output" to_op="Combine Documents" to_port="documents 3"/>
             <connect from_op="Combine Documents" from_port="document" to_port="result 1"/>
             <portSpacing port="source_example set" spacing="0"/>
             <portSpacing port="sink_example set" spacing="0"/>
             <portSpacing port="sink_result 1" spacing="0"/>
             <portSpacing port="sink_result 2" spacing="0"/>
           </process>
         </operator>
         <operator activated="true" class="text:create_document" compatibility="5.3.002" expanded="true" height="60" name="Close XML" width="90" x="246" y="255">
           <parameter key="text" value="&lt;/data&gt;"/>
         </operator>
         <operator activated="true" class="text:create_document" compatibility="5.3.002" expanded="true" height="60" name="Begin XML" width="90" x="246" y="30">
           <parameter key="text" value="&lt;?xml version=&quot;1.0&quot; encoding=&quot;utf-8&quot;?&gt;&#10;&lt;data&gt;&#10;"/>
         </operator>
         <operator activated="true" class="text:combine_documents" compatibility="5.3.002" expanded="true" height="112" name="Combine Documents (2)" width="90" x="447" y="120"/>
         <operator activated="true" class="text:write_document" compatibility="5.3.002" expanded="true" height="76" name="Write Document" width="90" x="581" y="120">
           <parameter key="file" value="C:\Users\hhomburg\Documents\out_%{example}.xml"/>
         </operator>
         <connect from_port="example set" to_op="Loop Attributes" to_port="example set"/>
         <connect from_op="Loop Attributes" from_port="result 1" to_op="Combine Documents (2)" to_port="documents 2"/>
         <connect from_op="Close XML" from_port="output" to_op="Combine Documents (2)" to_port="documents 3"/>
         <connect from_op="Begin XML" from_port="output" to_op="Combine Documents (2)" to_port="documents 1"/>
         <connect from_op="Combine Documents (2)" from_port="document" to_op="Write Document" to_port="document"/>
         <connect from_op="Write Document" from_port="document" to_port="output 1"/>
         <portSpacing port="source_example set" spacing="0"/>
         <portSpacing port="sink_example set" spacing="0"/>
         <portSpacing port="sink_output 1" spacing="0"/>
         <portSpacing port="sink_output 2" spacing="0"/>
       </process>
     </operator>
     <connect from_op="Retrieve Golf" from_port="output" to_op="Loop Examples" to_port="example set"/>
     <connect from_op="Loop Examples" from_port="output 1" to_port="result 1"/>
     <portSpacing port="source_input 1" spacing="0"/>
     <portSpacing port="sink_result 1" spacing="0"/>
     <portSpacing port="sink_result 2" spacing="0"/>
   </process>
 </operator>
</process>

 

Cheers

 

Answers

  • SGolbertSGolbert RapidMiner Certified Analyst, Member Posts: 336   Unicorn

    Hi,

     

    I extended the process a bit and now I obtain an .xml file that I can read with the Read XML operator.

     

    <?xml version="1.0" encoding="UTF-8"?><process version="8.2.001">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="8.2.001" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="retrieve" compatibility="8.2.001" expanded="true" height="68" name="Retrieve Golf" width="90" x="45" y="210">
    <parameter key="repository_entry" value="//Samples/data/Golf"/>
    </operator>
    <operator activated="true" class="loop_examples" compatibility="8.2.001" expanded="true" height="103" name="Loop Examples" width="90" x="246" y="210">
    <process expanded="true">
    <operator activated="true" class="loop_attributes" compatibility="8.2.001" expanded="true" height="103" name="Loop Attributes" width="90" x="246" y="120">
    <parameter key="include_special_attributes" value="true"/>
    <process expanded="true">
    <operator activated="true" class="text:extract_document" compatibility="8.1.000" expanded="true" height="76" name="Extract Document" width="90" x="246" y="165">
    <parameter key="attribute_name" value="%{loop_attribute}"/>
    <parameter key="example_index" value="%{example}"/>
    </operator>
    <operator activated="true" class="text:create_document" compatibility="8.1.000" expanded="true" height="60" name="CLose Tag (2)" width="90" x="246" y="615">
    <parameter key="text" value="&lt;/%{loop_attribute}&gt;&#10;"/>
    </operator>
    <operator activated="true" class="text:create_document" compatibility="8.1.000" expanded="true" height="60" name="CLose Tag (3)" width="90" x="514" y="660">
    <parameter key="text" value="&lt;/%{loop_attribute}&gt;&#10;"/>
    </operator>
    <operator activated="true" class="text:create_document" compatibility="8.1.000" expanded="true" height="60" name="Open Tag" width="90" x="246" y="75">
    <parameter key="text" value=" &lt;%{loop_attribute}&gt;"/>
    </operator>
    <operator activated="true" class="text:create_document" compatibility="8.1.000" expanded="true" height="60" name="CLose Tag" width="90" x="246" y="255">
    <parameter key="text" value="&lt;/%{loop_attribute}&gt;&#10;"/>
    </operator>
    <operator activated="true" class="text:combine_documents" compatibility="8.1.000" expanded="true" height="112" name="Combine Documents" width="90" x="447" y="165"/>
    <connect from_port="example set" to_op="Extract Document" to_port="example set"/>
    <connect from_op="Extract Document" from_port="document" to_op="Combine Documents" to_port="documents 2"/>
    <connect from_op="Open Tag" from_port="output" to_op="Combine Documents" to_port="documents 1"/>
    <connect from_op="CLose Tag" from_port="output" to_op="Combine Documents" to_port="documents 3"/>
    <connect from_op="Combine Documents" from_port="document" to_port="result 1"/>
    <portSpacing port="source_example set" spacing="0"/>
    <portSpacing port="sink_example set" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="text:create_document" compatibility="8.1.000" expanded="true" height="68" name="Begin XML (2)" width="90" x="313" y="34">
    <parameter key="text" value="&lt;example&gt;&#10;"/>
    </operator>
    <operator activated="true" class="text:create_document" compatibility="8.1.000" expanded="true" height="68" name="Close XML (2)" width="90" x="380" y="289">
    <parameter key="text" value="&lt;/example&gt;"/>
    </operator>
    <operator activated="true" class="text:combine_documents" compatibility="8.1.000" expanded="true" height="124" name="Combine Documents (2)" width="90" x="514" y="136"/>
    <connect from_port="example set" to_op="Loop Attributes" to_port="example set"/>
    <connect from_op="Loop Attributes" from_port="result 1" to_op="Combine Documents (2)" to_port="documents 2"/>
    <connect from_op="Begin XML (2)" from_port="output" to_op="Combine Documents (2)" to_port="documents 1"/>
    <connect from_op="Close XML (2)" from_port="output" to_op="Combine Documents (2)" to_port="documents 3"/>
    <connect from_op="Combine Documents (2)" from_port="document" to_port="output 1"/>
    <portSpacing port="source_example set" spacing="0"/>
    <portSpacing port="sink_example set" spacing="0"/>
    <portSpacing port="sink_output 1" spacing="0"/>
    <portSpacing port="sink_output 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="text:create_document" compatibility="8.1.000" expanded="true" height="68" name="Begin XML" width="90" x="313" y="34">
    <parameter key="text" value="&lt;?xml version=&quot;1.0&quot; encoding=&quot;utf-8&quot;?&gt;&#10;&lt;examples&gt;&#10;"/>
    </operator>
    <operator activated="true" class="text:create_document" compatibility="8.1.000" expanded="true" height="68" name="Close XML" width="90" x="447" y="289">
    <parameter key="text" value="&lt;/examples&gt;"/>
    </operator>
    <operator activated="true" class="text:combine_documents" compatibility="8.1.000" expanded="true" height="124" name="Combine Documents (3)" width="90" x="581" y="136"/>
    <operator activated="true" class="text:write_document" compatibility="8.1.000" expanded="true" height="82" name="Write Document" width="90" x="715" y="136">
    <parameter key="file" value="C:\Users\sgolbert\Desktop\test.xml"/>
    </operator>
    <operator activated="true" class="advanced_file_connectors:read_xml" compatibility="8.2.001" expanded="true" height="68" name="Read XML" width="90" x="782" y="289">
    <parameter key="file" value="C:\Users\sgolbert\Desktop\test.xml"/>
    <parameter key="xpath_for_examples" value="//examples/example"/>
    <enumeration key="xpaths_for_attributes">
    <parameter key="xpath_for_attribute" value="Outlook[1]/text()"/>
    <parameter key="xpath_for_attribute" value="Temperature[1]/text()"/>
    <parameter key="xpath_for_attribute" value="Humidity[1]/text()"/>
    <parameter key="xpath_for_attribute" value="Wind[1]/text()"/>
    <parameter key="xpath_for_attribute" value="Play[1]/text()"/>
    </enumeration>
    <list key="namespaces"/>
    <parameter key="use_default_namespace" value="false"/>
    <list key="annotations"/>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="Outlook[1]/text().true.nominal.attribute"/>
    <parameter key="1" value="Temperature[1]/text().true.nominal.attribute"/>
    <parameter key="2" value="Humidity[1]/text().true.nominal.attribute"/>
    <parameter key="3" value="Wind[1]/text().true.nominal.attribute"/>
    <parameter key="4" value="Play[1]/text().true.nominal.attribute"/>
    </list>
    </operator>
    <connect from_op="Retrieve Golf" from_port="output" to_op="Loop Examples" to_port="example set"/>
    <connect from_op="Loop Examples" from_port="output 1" to_op="Combine Documents (3)" to_port="documents 2"/>
    <connect from_op="Begin XML" from_port="output" to_op="Combine Documents (3)" to_port="documents 1"/>
    <connect from_op="Close XML" from_port="output" to_op="Combine Documents (3)" to_port="documents 3"/>
    <connect from_op="Combine Documents (3)" from_port="document" to_op="Write Document" to_port="document"/>
    <connect from_op="Write Document" from_port="document" to_port="result 1"/>
    <connect from_op="Read XML" from_port="output" to_port="result 2"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    <portSpacing port="sink_result 3" spacing="0"/>
    </process>
    </operator>
    </process>

    I hope it helps!

     

    Regards,

    Sebastian

  • MaerkliMaerkli Member Posts: 84   Unicorn

    Hallo Sebastian,

    May I ask you, please, to publish the results you obtain with your process?

    Merci,

    Maerkli

  • SGolbertSGolbert RapidMiner Certified Analyst, Member Posts: 336   Unicorn

    Hi Maerkli,

     

    Are you refering to the results of Read XML?

     

    Unbenannt.PNG22Unbenannt.PNG

     

    Since it uses the Golf dataset, you can test it on your computer. It would be easy to rename the attributes.

     

    Regards,

    Sebastian

     

    yyhuang
Sign In or Register to comment.