Options

[Solved] Error message: The given example sets are not compatible

JEdwardJEdward RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 578 Unicorn
edited November 2018 in Help
Hi,

I'm trying to use the Append operator to combine a collection following a loop.  
The loop creates an additional attribute named 'IDNum' then extracts this value within the loop and during an internal loop adds this macro value as an attribute to the dataset.  
Unfortunately during this process it converts the datatype of the macro to Real (if it has data) or Nominal (if there is no example set returned).  This means that an append operator following the macro doesn't combine the datasets throwing an incompatibility error.  

How can I enforce datatype in the generate attributes operator even if there is no examples in the example set?  

here is a sample process demonstrating the problem:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.015">
 <context>
   <input/>
   <output/>
   <macros/>
 </context>
 <operator activated="true" class="process" compatibility="5.3.015" expanded="true" name="Process">
   <process expanded="true">
     <operator activated="true" class="text:create_document" compatibility="5.3.002" expanded="true" height="60" name="Create Document" width="90" x="45" y="75">
       <parameter key="text" value="1"/>
     </operator>
     <operator activated="true" class="text:write_document" compatibility="5.3.002" expanded="true" height="76" name="Write Document" width="90" x="179" y="75"/>
     <operator activated="true" class="read_csv" compatibility="5.3.015" expanded="true" height="60" name="Read CSV" width="90" x="112" y="165">
       <parameter key="first_row_as_names" value="false"/>
       <list key="annotations"/>
       <list key="data_set_meta_data_information">
         <parameter key="0" value="IDNum.true.integer.regular"/>
       </list>
     </operator>
     <operator activated="true" class="loop_examples" compatibility="5.3.015" expanded="true" height="112" name="Loop Examples" width="90" x="246" y="165">
       <process expanded="true">
         <operator activated="true" class="extract_macro" compatibility="5.3.015" expanded="true" height="60" name="Extract Macro" width="90" x="45" y="30">
           <parameter key="macro" value="IDNum"/>
           <parameter key="macro_type" value="data_value"/>
           <parameter key="attribute_name" value="IDNum"/>
           <parameter key="example_index" value="%{example}"/>
           <list key="additional_macros"/>
         </operator>
         <operator activated="true" class="loop" compatibility="5.3.015" expanded="true" height="94" name="Loop" width="90" x="179" y="30">
           <process expanded="true">
             <operator activated="true" class="generate_data_user_specification" compatibility="5.3.015" expanded="true" height="60" name="Generate Data by User Specification" width="90" x="45" y="30">
               <list key="attribute_values">
                 <parameter key="DeliberatelyNull" value="1"/>
               </list>
               <list key="set_additional_roles"/>
             </operator>
             <operator activated="true" class="multiply" compatibility="5.3.015" expanded="true" height="94" name="Multiply" width="90" x="45" y="120"/>
             <operator activated="true" class="filter_example_range" compatibility="5.3.015" expanded="true" height="76" name="Filter Example Range" width="90" x="246" y="165">
               <parameter key="first_example" value="1"/>
               <parameter key="last_example" value="1"/>
               <parameter key="invert_filter" value="true"/>
             </operator>
             <operator activated="true" class="generate_attributes" compatibility="5.3.015" expanded="true" height="76" name="IntegerBecomesNominal" width="90" x="380" y="165">
               <list key="function_descriptions">
                 <parameter key="IDNum" value="%{IDNum}"/>
               </list>
             </operator>
             <operator activated="true" class="generate_attributes" compatibility="5.3.015" expanded="true" height="76" name="IntegerBecomesReal" width="90" x="380" y="30">
               <list key="function_descriptions">
                 <parameter key="IDNum" value="%{IDNum}"/>
               </list>
             </operator>
             <connect from_op="Generate Data by User Specification" from_port="output" to_op="Multiply" to_port="input"/>
             <connect from_op="Multiply" from_port="output 1" to_op="Filter Example Range" to_port="example set input"/>
             <connect from_op="Multiply" from_port="output 2" to_op="IntegerBecomesReal" to_port="example set input"/>
             <connect from_op="Filter Example Range" from_port="example set output" to_op="IntegerBecomesNominal" to_port="example set input"/>
             <connect from_op="IntegerBecomesNominal" from_port="example set output" to_port="output 2"/>
             <connect from_op="IntegerBecomesReal" from_port="example set output" to_port="output 1"/>
             <portSpacing port="source_input 1" spacing="0"/>
             <portSpacing port="sink_output 1" spacing="0"/>
             <portSpacing port="sink_output 2" spacing="0"/>
             <portSpacing port="sink_output 3" spacing="0"/>
           </process>
         </operator>
         <connect from_port="example set" to_op="Extract Macro" to_port="example set"/>
         <connect from_op="Loop" from_port="output 1" to_port="output 1"/>
         <connect from_op="Loop" from_port="output 2" to_port="output 2"/>
         <portSpacing port="source_example set" spacing="0"/>
         <portSpacing port="sink_example set" spacing="0"/>
         <portSpacing port="sink_output 1" spacing="0"/>
         <portSpacing port="sink_output 2" spacing="0"/>
         <portSpacing port="sink_output 3" spacing="0"/>
       </process>
     </operator>
     <connect from_op="Create Document" from_port="output" to_op="Write Document" to_port="document"/>
     <connect from_op="Write Document" from_port="file" to_op="Read CSV" to_port="file"/>
     <connect from_op="Read CSV" from_port="output" to_op="Loop Examples" to_port="example set"/>
     <connect from_op="Loop Examples" from_port="example set" to_port="result 1"/>
     <connect from_op="Loop Examples" from_port="output 1" to_port="result 2"/>
     <connect from_op="Loop Examples" from_port="output 2" to_port="result 3"/>
     <portSpacing port="source_input 1" spacing="0"/>
     <portSpacing port="sink_result 1" spacing="0"/>
     <portSpacing port="sink_result 2" spacing="0"/>
     <portSpacing port="sink_result 3" spacing="0"/>
     <portSpacing port="sink_result 4" spacing="0"/>
   </process>
 </operator>
</process>

Answers

  • Options
    Marco_BoeckMarco_Boeck Administrator, Moderator, Employee, Member, University Professor Posts: 1,993 RM Engineering
    Hi,

    you can use the str() or parse() functions. See the following example process:

    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="6.0.004">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="6.0.004" expanded="true" name="Process">
        <process expanded="true">
          <operator activated="true" class="retrieve" compatibility="6.0.004" expanded="true" height="60" name="Retrieve Iris" width="90" x="45" y="30">
            <parameter key="repository_entry" value="//Samples/data/Iris"/>
          </operator>
          <operator activated="true" class="declare_missing_value" compatibility="6.0.004" expanded="true" height="76" name="Declare Missing Value" width="90" x="179" y="30">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="a1"/>
            <parameter key="numeric_value" value="5.1"/>
          </operator>
          <operator activated="true" breakpoints="after" class="generate_attributes" compatibility="6.0.004" expanded="true" height="76" name="Generate Attributes" width="90" x="313" y="30">
            <list key="function_descriptions">
              <parameter key="newNominalAtt" value="str(a1)"/>
            </list>
          </operator>
          <operator activated="true" class="generate_attributes" compatibility="6.0.004" expanded="true" height="76" name="Generate Attributes (2)" width="90" x="447" y="30">
            <list key="function_descriptions">
              <parameter key="newNumericalAtt" value="parse(newNominalAtt)"/>
            </list>
          </operator>
          <connect from_op="Retrieve Iris" from_port="output" to_op="Declare Missing Value" to_port="example set input"/>
          <connect from_op="Declare Missing Value" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
          <connect from_op="Generate Attributes" from_port="example set output" to_op="Generate Attributes (2)" to_port="example set input"/>
          <connect from_op="Generate Attributes (2)" from_port="example set output" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>
    Regards,
    Marco
  • Options
    JEdwardJEdward RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 578 Unicorn
    Hi Marco,

    Unfortunately I had already tried that method and it doesn't work in my example because I don't know if my attribute is going to be nominal or not at the beginning. 
    str(value) & parse(value) throws an error if it gets the wrong type of datafield.  e.g. if it thinks it's receiving nominal and not numeric. 

    However, whilst you were posting I discovered that using the operator Parse Numbers does do the job as it seems to be more tolerant. 
    See below my reworked example process:
    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="5.3.015">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="5.3.015" expanded="true" name="Process">
        <process expanded="true">
          <operator activated="true" class="text:create_document" compatibility="5.3.002" expanded="true" height="60" name="Create Document" width="90" x="45" y="75">
            <parameter key="text" value="1"/>
          </operator>
          <operator activated="true" class="text:write_document" compatibility="5.3.002" expanded="true" height="76" name="Write Document" width="90" x="179" y="75"/>
          <operator activated="true" class="read_csv" compatibility="5.3.015" expanded="true" height="60" name="Read CSV" width="90" x="112" y="165">
            <parameter key="first_row_as_names" value="false"/>
            <list key="annotations"/>
            <list key="data_set_meta_data_information">
              <parameter key="0" value="IDNum.true.integer.regular"/>
            </list>
          </operator>
          <operator activated="true" class="loop_examples" compatibility="5.3.015" expanded="true" height="112" name="Loop Examples" width="90" x="246" y="165">
            <process expanded="true">
              <operator activated="true" class="extract_macro" compatibility="5.3.015" expanded="true" height="60" name="Extract Macro" width="90" x="45" y="30">
                <parameter key="macro" value="IDNum"/>
                <parameter key="macro_type" value="data_value"/>
                <parameter key="attribute_name" value="IDNum"/>
                <parameter key="example_index" value="%{example}"/>
                <list key="additional_macros"/>
              </operator>
              <operator activated="true" class="loop" compatibility="5.3.015" expanded="true" height="94" name="Loop" width="90" x="179" y="30">
                <process expanded="true">
                  <operator activated="true" class="generate_data_user_specification" compatibility="5.3.015" expanded="true" height="60" name="Generate Data by User Specification" width="90" x="45" y="30">
                    <list key="attribute_values">
                      <parameter key="DeliberatelyNull" value="1"/>
                    </list>
                    <list key="set_additional_roles"/>
                  </operator>
                  <operator activated="true" class="multiply" compatibility="5.3.015" expanded="true" height="94" name="Multiply" width="90" x="45" y="120"/>
                  <operator activated="true" class="filter_example_range" compatibility="5.3.015" expanded="true" height="76" name="Filter Example Range" width="90" x="246" y="165">
                    <parameter key="first_example" value="1"/>
                    <parameter key="last_example" value="1"/>
                    <parameter key="invert_filter" value="true"/>
                  </operator>
                  <operator activated="true" class="generate_attributes" compatibility="5.3.015" expanded="true" height="76" name="IntegerBecomesNominal" width="90" x="380" y="165">
                    <list key="function_descriptions">
                      <parameter key="IDNum" value="%{IDNum}"/>
                    </list>
                  </operator>
                  <operator activated="true" class="parse_numbers" compatibility="5.3.015" expanded="true" height="76" name="Parse Numbers" width="90" x="514" y="165">
                    <parameter key="attribute_filter_type" value="single"/>
                    <parameter key="attribute" value="IDNum"/>
                  </operator>
                  <operator activated="true" class="real_to_integer" compatibility="5.3.015" expanded="true" height="76" name="Real to Integer" width="90" x="648" y="120"/>
                  <operator activated="true" class="generate_attributes" compatibility="5.3.015" expanded="true" height="76" name="IntegerBecomesReal" width="90" x="380" y="30">
                    <list key="function_descriptions">
                      <parameter key="IDNum" value="%{IDNum}"/>
                    </list>
                  </operator>
                  <operator activated="true" class="parse_numbers" compatibility="5.3.015" expanded="true" height="76" name="Parse Numbers (2)" width="90" x="514" y="30">
                    <parameter key="attribute_filter_type" value="single"/>
                    <parameter key="attribute" value="IDNum"/>
                  </operator>
                  <operator activated="true" class="real_to_integer" compatibility="5.3.015" expanded="true" height="76" name="Real to Integer (2)" width="90" x="648" y="30"/>
                  <connect from_op="Generate Data by User Specification" from_port="output" to_op="Multiply" to_port="input"/>
                  <connect from_op="Multiply" from_port="output 1" to_op="Filter Example Range" to_port="example set input"/>
                  <connect from_op="Multiply" from_port="output 2" to_op="IntegerBecomesReal" to_port="example set input"/>
                  <connect from_op="Filter Example Range" from_port="example set output" to_op="IntegerBecomesNominal" to_port="example set input"/>
                  <connect from_op="IntegerBecomesNominal" from_port="example set output" to_op="Parse Numbers" to_port="example set input"/>
                  <connect from_op="Parse Numbers" from_port="example set output" to_op="Real to Integer" to_port="example set input"/>
                  <connect from_op="Real to Integer" from_port="example set output" to_port="output 2"/>
                  <connect from_op="IntegerBecomesReal" from_port="example set output" to_op="Parse Numbers (2)" to_port="example set input"/>
                  <connect from_op="Parse Numbers (2)" from_port="example set output" to_op="Real to Integer (2)" to_port="example set input"/>
                  <connect from_op="Real to Integer (2)" from_port="example set output" to_port="output 1"/>
                  <portSpacing port="source_input 1" spacing="0"/>
                  <portSpacing port="sink_output 1" spacing="0"/>
                  <portSpacing port="sink_output 2" spacing="0"/>
                  <portSpacing port="sink_output 3" spacing="0"/>
                </process>
              </operator>
              <connect from_port="example set" to_op="Extract Macro" to_port="example set"/>
              <connect from_op="Loop" from_port="output 1" to_port="output 1"/>
              <connect from_op="Loop" from_port="output 2" to_port="output 2"/>
              <portSpacing port="source_example set" spacing="0"/>
              <portSpacing port="sink_example set" spacing="0"/>
              <portSpacing port="sink_output 1" spacing="0"/>
              <portSpacing port="sink_output 2" spacing="0"/>
              <portSpacing port="sink_output 3" spacing="0"/>
            </process>
          </operator>
          <connect from_op="Create Document" from_port="output" to_op="Write Document" to_port="document"/>
          <connect from_op="Write Document" from_port="file" to_op="Read CSV" to_port="file"/>
          <connect from_op="Read CSV" from_port="output" to_op="Loop Examples" to_port="example set"/>
          <connect from_op="Loop Examples" from_port="example set" to_port="result 1"/>
          <connect from_op="Loop Examples" from_port="output 1" to_port="result 2"/>
          <connect from_op="Loop Examples" from_port="output 2" to_port="result 3"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
          <portSpacing port="sink_result 3" spacing="0"/>
          <portSpacing port="sink_result 4" spacing="0"/>
        </process>
      </operator>
    </process>
    Thanks anyway. 
    JEdward.
Sign In or Register to comment.