Options

[SOLVED] Replace Missing Values effects can propagate backwards?

tennenrishintennenrishin Member Posts: 177 Contributor II
edited June 2019 in Help
In this process, the Replace Missing Values operator affects even the output that never passed through it, if its "create view" parameter is unchecked. Is this the intended (and even default) behavior?
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.008">
 <context>
   <input/>
   <output/>
   <macros/>
 </context>
 <operator activated="true" class="process" compatibility="5.2.008" expanded="true" name="Process">
   <process expanded="true" height="654" width="1015">
     <operator activated="true" class="generate_data_user_specification" compatibility="5.2.008" expanded="true" height="60" name="Generate Data by User Specification" width="90" x="179" y="30">
       <list key="attribute_values">
         <parameter key="a" value="1"/>
         <parameter key="b" value="2"/>
       </list>
       <list key="set_additional_roles"/>
     </operator>
     <operator activated="true" class="generate_data_user_specification" compatibility="5.2.008" expanded="true" height="60" name="Generate Data by User Specification (2)" width="90" x="179" y="120">
       <list key="attribute_values">
         <parameter key="a" value="1"/>
         <parameter key="b" value="2"/>
         <parameter key="z" value="&quot;hi&quot;"/>
         <parameter key="x" value="1"/>
       </list>
       <list key="set_additional_roles"/>
     </operator>
     <operator activated="true" class="union" compatibility="5.2.008" expanded="true" height="76" name="Union" width="90" x="380" y="30"/>
     <operator activated="true" class="generate_attributes" compatibility="5.2.008" expanded="true" height="76" name="Generate Attributes" width="90" x="648" y="30">
       <list key="function_descriptions">
         <parameter key="c" value="a+b"/>
       </list>
     </operator>
     <operator activated="true" class="replace_missing_values" compatibility="5.2.008" expanded="true" height="94" name="Replace Missing Values" width="90" x="648" y="120">
       <parameter key="attribute_filter_type" value="value_type"/>
       <parameter key="value_type" value="nominal"/>
       <parameter key="default" value="value"/>
       <list key="columns"/>
       <parameter key="replenishment_value" value="???MISSING???"/>
     </operator>
     <connect from_op="Generate Data by User Specification" from_port="output" to_op="Union" to_port="example set 1"/>
     <connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Union" to_port="example set 2"/>
     <connect from_op="Union" from_port="union" to_op="Generate Attributes" to_port="example set input"/>
     <connect from_op="Generate Attributes" from_port="example set output" to_port="result 1"/>
     <connect from_op="Generate Attributes" from_port="original" to_op="Replace Missing Values" to_port="example set input"/>
     <connect from_op="Replace Missing Values" from_port="example set output" to_port="result 2"/>
     <portSpacing port="source_input 1" spacing="0"/>
     <portSpacing port="sink_result 1" spacing="0"/>
     <portSpacing port="sink_result 2" spacing="72"/>
     <portSpacing port="sink_result 3" spacing="0"/>
   </process>
 </operator>
</process>
Tagged:

Answers

  • Options
    Nils_WoehlerNils_Woehler Member Posts: 463 Maven
    Hi,

    if you do not check "Create view" the missing values replacement will work directly on the data table your two example sets are based on.
    It will replace all missing values for every example from the example table. Because both of your example sets are based upon the same example table
    it looks like the changes are being back propagated. If you want to avoid this, check Create view and materialize the data afterwards.
    This will create a different example table for the second example set. But be aware that this also will increase the memory use of RapidMiner!

    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="5.2.008">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="5.2.008" expanded="true" name="Process">
        <process expanded="true" height="654" width="1015">
          <operator activated="true" class="generate_data_user_specification" compatibility="5.2.008" expanded="true" height="60" name="Generate Data by User Specification" width="90" x="179" y="30">
            <list key="attribute_values">
              <parameter key="a" value="1"/>
              <parameter key="b" value="2"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="5.2.008" expanded="true" height="60" name="Generate Data by User Specification (2)" width="90" x="179" y="120">
            <list key="attribute_values">
              <parameter key="a" value="1"/>
              <parameter key="b" value="2"/>
              <parameter key="z" value="&quot;hi&quot;"/>
              <parameter key="x" value="1"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="union" compatibility="5.2.008" expanded="true" height="76" name="Union" width="90" x="380" y="30"/>
          <operator activated="true" breakpoints="before" class="generate_attributes" compatibility="5.2.008" expanded="true" height="76" name="Generate Attributes" width="90" x="648" y="30">
            <list key="function_descriptions">
              <parameter key="c" value="a+b"/>
            </list>
          </operator>
          <operator activated="true" class="replace_missing_values" compatibility="5.2.008" expanded="true" height="94" name="Replace Missing Values" width="90" x="648" y="120">
            <parameter key="create_view" value="true"/>
            <parameter key="attribute_filter_type" value="value_type"/>
            <parameter key="value_type" value="nominal"/>
            <parameter key="default" value="value"/>
            <list key="columns"/>
            <parameter key="replenishment_value" value="???MISSING???"/>
          </operator>
          <operator activated="true" class="materialize_data" compatibility="5.2.008" expanded="true" height="76" name="Materialize Data" width="90" x="782" y="120"/>
          <connect from_op="Generate Data by User Specification" from_port="output" to_op="Union" to_port="example set 1"/>
          <connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Union" to_port="example set 2"/>
          <connect from_op="Union" from_port="union" to_op="Generate Attributes" to_port="example set input"/>
          <connect from_op="Generate Attributes" from_port="example set output" to_port="result 1"/>
          <connect from_op="Generate Attributes" from_port="original" to_op="Replace Missing Values" to_port="example set input"/>
          <connect from_op="Replace Missing Values" from_port="example set output" to_op="Materialize Data" to_port="example set input"/>
          <connect from_op="Materialize Data" from_port="example set output" to_port="result 2"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="72"/>
          <portSpacing port="sink_result 3" spacing="0"/>
        </process>
      </operator>
    </process>
    Best,
    Nils
Sign In or Register to comment.