RapidMiner

Filtering with a looped parameter

SOLVED
Contributor

Filtering with a looped parameter

Hi All,

 

I have a data set with dates ranging from the beginning to the end of 2016. I am creating a forecasting model and I want to iterate through the data set multiple times, the first time removing the last 6 months, the second time removing the last 5 months, then removing the last 4 months etc.

 

I am currently trying to so this using a loop parameters operation and trying to adjust the filter, however I can either not get the syntax right or need a different approach.

 

Could someone please tell me the exact options to select in the loop parameters tool or a better method

 

Thank you in advance

4 REPLIES
FBT
Super Contributor

Re: Filtering with a looped parameter

I may not fully understand the exact thing you would like to achieve, but I would probably take a look at the Macro operators (Set Macro or Extract Macro) and use the defined macros as loop parameters. If your data is not sensitive, or, if you can make up fake data with the same structure and post it here, I can take a closer look.  

Highlighted
Moderator

Re: Filtering with a looped parameter

Dear Edinsda,

 

Do you want to do this for validation purposes? In this case Sliding Window Validation might be the right operator.

 

Otherwise you can do it with a combination of Loop Parameters and Filter Examples Range (or Filter Examples). Attached is a process which shows it.

 

Best,

Martin

<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="7.5.003" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="generate_data" compatibility="7.5.003" expanded="true" height="68" name="Generate Data" width="90" x="45" y="34"/>
      <operator activated="true" class="extract_macro" compatibility="7.5.003" expanded="true" height="68" name="Extract Macro" width="90" x="380" y="34">
        <parameter key="macro" value="numberOfExamples"/>
        <list key="additional_macros"/>
      </operator>
      <operator activated="true" class="loop_parameters" compatibility="7.5.003" expanded="true" height="82" name="Loop Parameters" width="90" x="514" y="34">
        <list key="parameters">
          <parameter key="Filter Example Range.first_example" value="[1.0;50;3;linear]"/>
        </list>
        <process expanded="true">
          <operator activated="true" class="filter_example_range" compatibility="7.5.003" expanded="true" height="82" name="Filter Example Range" width="90" x="112" y="34">
            <parameter key="first_example" value="50"/>
            <parameter key="last_example" value="%{numberOfExamples}"/>
          </operator>
          <operator activated="true" class="h2o:generalized_linear_model" compatibility="7.5.000" expanded="true" height="103" name="Generalized Linear Model" width="90" x="782" y="34">
            <list key="beta_constraints"/>
            <list key="expert_parameters"/>
          </operator>
          <connect from_port="input 1" to_op="Filter Example Range" to_port="example set input"/>
          <connect from_op="Filter Example Range" from_port="example set output" to_op="Generalized Linear Model" to_port="training set"/>
          <connect from_op="Generalized Linear Model" from_port="model" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="source_input 2" spacing="0"/>
          <portSpacing port="sink_performance" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
      <connect from_op="Generate Data" from_port="output" to_op="Extract Macro" to_port="example set"/>
      <connect from_op="Extract Macro" from_port="example set" to_op="Loop Parameters" to_port="input 1"/>
      <connect from_op="Loop Parameters" from_port="result 1" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>
--------------------------------------------------------------------------
Head of Data Science Services at RapidMiner
Contributor

Re: Filtering with a looped parameter

Thank you both for your response.

 

Here is an example XML for what I am trying to achive. Where the filter value is set to 1, I want to loop through setting it to 1, 2, 3, 4, 5, 6, 7, 8 and 9.

 

<?xml version="1.0" encoding="UTF-8"?><process version="7.5.001">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="7.5.001" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="generate_data" compatibility="7.5.001" expanded="true" height="68" name="Generate Data" width="90" x="45" y="34"/>
      <operator activated="true" class="extract_macro" compatibility="7.5.001" expanded="true" height="68" name="Extract Macro" width="90" x="246" y="34">
        <parameter key="macro" value="numberOfExamples"/>
        <list key="additional_macros"/>
      </operator>
      <operator activated="true" class="loop_parameters" compatibility="7.5.001" expanded="true" height="82" name="Loop Parameters" width="90" x="380" y="34">
        <list key="parameters">
          <parameter key="Filter Example Range.first_example" value="[1.0;50;3;linear]"/>
        </list>
        <process expanded="true">
          <operator activated="true" class="h2o:generalized_linear_model" compatibility="7.5.000" expanded="true" height="103" name="Generalized Linear Model" width="90" x="782" y="34">
            <list key="beta_constraints"/>
            <list key="expert_parameters"/>
          </operator>
          <operator activated="true" class="filter_examples" compatibility="7.5.001" expanded="true" height="103" name="Filter Examples" width="90" x="246" y="85">
            <list key="filters_list">
              <parameter key="filters_entry_key" value="att1.gt.1"/>
            </list>
          </operator>
          <connect from_port="input 1" to_op="Filter Examples" to_port="example set input"/>
          <connect from_op="Generalized Linear Model" from_port="model" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="source_input 2" spacing="0"/>
          <portSpacing port="sink_performance" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
      <connect from_op="Generate Data" from_port="output" to_op="Extract Macro" to_port="example set"/>
      <connect from_op="Extract Macro" from_port="example set" to_op="Loop Parameters" to_port="input 1"/>
      <connect from_op="Loop Parameters" from_port="result 1" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

The biggest issue is that I don't know what to select in loop parameters, see the image below.

 

Capture.PNG

 

I can't get the parameter string or parameter expression to work and I don't know what the others do.

 

Thanks again

Moderator

Re: Filtering with a looped parameter

Dear Edindsa,

 

i would propose to do this with a standard Loop operator. It gives you a macro (e.g. process variable) with the current iteration number. We can use this to generate ourselves a macro with the filter value and use it in the filter. The expressions we use for this can be very sophisticated. 

An example is attached.

 

Best,

Martin

<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="7.5.003" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="generate_data" compatibility="7.5.003" expanded="true" height="68" name="Generate Data" width="90" x="45" y="34"/>
      <operator activated="true" class="extract_macro" compatibility="7.5.003" expanded="true" height="68" name="Extract Macro" width="90" x="246" y="34">
        <parameter key="macro" value="numberOfExamples"/>
        <list key="additional_macros"/>
      </operator>
      <operator activated="true" class="concurrency:loop" compatibility="7.5.003" expanded="true" height="82" name="Loop" width="90" x="514" y="34">
        <process expanded="true">
          <operator activated="true" class="generate_macro" compatibility="7.5.003" expanded="true" height="82" name="Generate Macro (2)" width="90" x="179" y="34">
            <list key="function_descriptions">
              <parameter key="filterValue" value="eval(%{iteration})"/>
            </list>
            <description align="center" color="transparent" colored="false" width="126">We can do more complex things here</description>
          </operator>
          <operator activated="true" class="filter_examples" compatibility="7.5.003" expanded="true" height="103" name="Filter Examples (2)" width="90" x="447" y="34">
            <list key="filters_list">
              <parameter key="filters_entry_key" value="att1.gt.%{filterValue}"/>
            </list>
          </operator>
          <operator activated="true" class="h2o:generalized_linear_model" compatibility="7.5.000" expanded="true" height="103" name="Generalized Linear Model (2)" width="90" x="648" y="34">
            <list key="beta_constraints"/>
            <list key="expert_parameters"/>
          </operator>
          <connect from_port="input 1" to_op="Generate Macro (2)" to_port="through 1"/>
          <connect from_op="Generate Macro (2)" from_port="through 1" to_op="Filter Examples (2)" to_port="example set input"/>
          <connect from_op="Filter Examples (2)" from_port="example set output" to_op="Generalized Linear Model (2)" to_port="training set"/>
          <connect from_op="Generalized Linear Model (2)" from_port="model" to_port="output 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="source_input 2" spacing="0"/>
          <portSpacing port="sink_output 1" spacing="0"/>
          <portSpacing port="sink_output 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="false" class="loop_parameters" compatibility="7.5.003" expanded="true" height="82" name="Loop Parameters" width="90" x="514" y="289">
        <list key="parameters">
          <parameter key="Filter Example Range.first_example" value="[1.0;50;3;linear]"/>
        </list>
        <process expanded="true">
          <operator activated="true" class="generate_macro" compatibility="7.5.003" expanded="true" height="82" name="Generate Macro" width="90" x="45" y="34">
            <list key="function_descriptions"/>
          </operator>
          <operator activated="true" class="filter_examples" compatibility="7.5.003" expanded="true" height="103" name="Filter Examples" width="90" x="447" y="34">
            <list key="filters_list">
              <parameter key="filters_entry_key" value="att1.gt.1"/>
            </list>
          </operator>
          <operator activated="true" class="h2o:generalized_linear_model" compatibility="7.5.000" expanded="true" height="103" name="Generalized Linear Model" width="90" x="782" y="34">
            <list key="beta_constraints"/>
            <list key="expert_parameters"/>
          </operator>
          <connect from_port="input 1" to_op="Generate Macro" to_port="through 1"/>
          <connect from_op="Generate Macro" from_port="through 1" to_op="Filter Examples" to_port="example set input"/>
          <connect from_op="Filter Examples" from_port="example set output" to_op="Generalized Linear Model" to_port="training set"/>
          <connect from_op="Generalized Linear Model" from_port="model" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="source_input 2" spacing="0"/>
          <portSpacing port="sink_performance" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
      <connect from_op="Generate Data" from_port="output" to_op="Extract Macro" to_port="example set"/>
      <connect from_op="Extract Macro" from_port="example set" to_op="Loop" to_port="input 1"/>
      <connect from_op="Loop" from_port="output 1" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>
--------------------------------------------------------------------------
Head of Data Science Services at RapidMiner