RapidMiner

Is is possible to loop a collection of models and obtain a collection of predictions?

Elite II

Is is possible to loop a collection of models and obtain a collection of predictions?

I'm trying to implement Blagging as described by Tom Fawcett in "Learning from Imbalanced Classes".

 

I summarized the algorithm:

 

1) Obtain bootstrap samples from the original imbalanced data set

2) Balance each sample by downsampling 

3) Estimate a model (e.g. tree)

4) Put the individual trees to vote

 

I'm trying to implement the algorithm in Rapidminer. I'm using collections. I can create a collection of models successfully, but when I try to loop this collection to produce another collection of prediction this time I run into trouble. Am I violating a rule about working with collections?

 

I'm using the abalone data set as an example. I've attached the files.

 

Any help will be appreciated.

 

<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="7.5.003" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="7.5.003" expanded="true" height="68" name="Retrieve training" width="90" x="45" y="187">
        <parameter key="repository_entry" value="training"/>
      </operator>
      <operator activated="true" class="concurrency:loop" compatibility="7.5.003" expanded="true" height="82" name="Loop" width="90" x="246" y="187">
        <parameter key="number_of_iterations" value="3"/>
        <process expanded="true">
          <operator activated="true" class="sample_bootstrapping" compatibility="7.5.003" expanded="true" height="82" name="Sample (Bootstrapping)" width="90" x="112" y="34"/>
          <operator activated="true" class="filter_examples" compatibility="7.5.003" expanded="true" height="103" name="Filter Examples" width="90" x="179" y="187">
            <list key="filters_list">
              <parameter key="filters_entry_key" value="Class.equals.positive"/>
            </list>
          </operator>
          <operator activated="true" class="extract_macro" compatibility="7.5.003" expanded="true" height="68" name="Extract Macro" width="90" x="581" y="34">
            <parameter key="macro" value="numfraud"/>
            <list key="additional_macros"/>
          </operator>
          <operator activated="true" class="filter_examples" compatibility="7.5.003" expanded="true" height="103" name="Filter Examples (2)" width="90" x="380" y="238">
            <list key="filters_list">
              <parameter key="filters_entry_key" value="Class.equals.negative"/>
            </list>
          </operator>
          <operator activated="true" class="sample" compatibility="7.5.003" expanded="true" height="82" name="Sample" width="90" x="581" y="238">
            <parameter key="sample_size" value="%{numfraud}"/>
            <list key="sample_size_per_class"/>
            <list key="sample_ratio_per_class"/>
            <list key="sample_probability_per_class"/>
          </operator>
          <operator activated="true" class="append" compatibility="7.5.003" expanded="true" height="103" name="Append" width="90" x="715" y="136"/>
          <operator activated="true" class="concurrency:parallel_decision_tree" compatibility="7.5.003" expanded="true" height="82" name="Decision Tree" width="90" x="849" y="85">
            <parameter key="criterion" value="gini_index"/>
            <parameter key="maximal_depth" value="5"/>
            <parameter key="apply_prepruning" value="false"/>
          </operator>
          <connect from_port="input 1" to_op="Sample (Bootstrapping)" to_port="example set input"/>
          <connect from_op="Sample (Bootstrapping)" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
          <connect from_op="Filter Examples" from_port="example set output" to_op="Extract Macro" to_port="example set"/>
          <connect from_op="Filter Examples" from_port="original" to_op="Filter Examples (2)" to_port="example set input"/>
          <connect from_op="Extract Macro" from_port="example set" to_op="Append" to_port="example set 1"/>
          <connect from_op="Filter Examples (2)" from_port="example set output" to_op="Sample" to_port="example set input"/>
          <connect from_op="Sample" from_port="example set output" to_op="Append" to_port="example set 2"/>
          <connect from_op="Append" from_port="merged set" to_op="Decision Tree" to_port="training set"/>
          <connect from_op="Decision Tree" from_port="model" to_port="output 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="source_input 2" spacing="0"/>
          <portSpacing port="sink_output 1" spacing="0"/>
          <portSpacing port="sink_output 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="collect" compatibility="7.5.003" expanded="true" height="82" name="Collect" width="90" x="380" y="187"/>
      <operator activated="true" class="loop_collection" compatibility="7.5.003" expanded="true" height="82" name="Loop Collection" width="90" x="581" y="187">
        <parameter key="set_iteration_macro" value="true"/>
        <parameter key="macro_name" value="i"/>
        <process expanded="true">
          <operator activated="true" class="select" compatibility="7.5.003" expanded="true" height="68" name="Select" width="90" x="112" y="85">
            <parameter key="index" value="%{i}"/>
          </operator>
          <operator activated="true" class="retrieve" compatibility="7.5.003" expanded="true" height="68" name="Retrieve test" width="90" x="112" y="187">
            <parameter key="repository_entry" value="test"/>
          </operator>
          <operator activated="true" class="apply_model" compatibility="7.5.003" expanded="true" height="82" name="Apply Model" width="90" x="246" y="136">
            <list key="application_parameters"/>
          </operator>
          <operator activated="true" class="select_attributes" compatibility="7.5.003" expanded="true" height="82" name="Select Attributes" width="90" x="447" y="136">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attributes" value="confidence(positive)|Id"/>
            <parameter key="include_special_attributes" value="true"/>
          </operator>
          <connect from_port="single" to_op="Select" to_port="collection"/>
          <connect from_op="Select" from_port="selected" to_op="Apply Model" to_port="model"/>
          <connect from_op="Retrieve test" from_port="output" to_op="Apply Model" to_port="unlabelled data"/>
          <connect from_op="Apply Model" from_port="labelled data" to_op="Select Attributes" to_port="example set input"/>
          <connect from_op="Select Attributes" from_port="example set output" to_port="output 1"/>
          <portSpacing port="source_single" spacing="0"/>
          <portSpacing port="sink_output 1" spacing="0"/>
          <portSpacing port="sink_output 2" spacing="0"/>
        </process>
      </operator>
      <connect from_op="Retrieve training" from_port="output" to_op="Loop" to_port="input 1"/>
      <connect from_op="Loop" from_port="output 1" to_op="Collect" to_port="input 1"/>
      <connect from_op="Collect" from_port="collection" to_op="Loop Collection" to_port="collection"/>
      <connect from_op="Loop Collection" from_port="output 1" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

Attachments

3 REPLIES
Highlighted
Elite II

Re: Is is possible to loop a collection of models and obtain a collection of predictions?

Forgot to attach the training set.

Attachments

RMStaff

Re: Is is possible to loop a collection of models and obtain a collection of predictions?

Hey,

 

cant you just use a bagging operator and balance the classes inside (e.g. with Generate Weight (Stratification))?

 

Best,

Martin

--------------------------------------------------------------------------
Head of Data Science Services at RapidMiner
Elite III

Re: Is is possible to loop a collection of models and obtain a collection of predictions?

Alternatively when you create the original models, don't store them in a collection, but rather as separate models in the repository.  Then you can simply use the "Vote" ensemble operator or similar to get your final prediction.

 

Brian T., Lindon Ventures - www.lindonventures.com
Analytics Consulting by Certified RapidMiner Analysts