RapidMiner

dman99
N/A

Combining Example Set Attributes

Looking for a way to combine attributes from an variable number of examplesets into a single exampleset.  Given that I'm running a loop to create each ExampleSet I end up with an IOObjectCollection on the output.  I've tried adding a join within loop but it is limited to knowing the first exampleset before combining others.  Is there a method using loops or set operators to get to the following:

ExampleSet 1:
Row#  Test1

ExampleSet 2:
Row#  Test2

ExampleSet 3:
Row#  Test3

Resulting ExampleSet:
Row#  Test1  Test2  Test3



<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.0.11" expanded="true" name="Process">
    <process expanded="true" height="690" width="882">
      <operator activated="true" class="text:create_document" compatibility="5.0.2" expanded="true" height="60" name="Create Document" width="90" x="45" y="75">
        <parameter key="text" value="Test1&#13;&#10;Test2&#13;&#10;Test3"/>
      </operator>
      <operator activated="true" class="text:documents_to_data" compatibility="5.0.2" expanded="true" height="76" name="Documents to Data" width="90" x="179" y="75">
        <parameter key="text_attribute" value="DateSet"/>
      </operator>
      <operator activated="true" class="split" compatibility="5.0.11" expanded="true" height="76" name="Split" width="90" x="313" y="75">
        <parameter key="attribute" value="DataSet"/>
        <parameter key="split_pattern" value="\n"/>
      </operator>
      <operator activated="true" class="transpose" compatibility="5.0.11" expanded="true" height="76" name="Transpose" width="90" x="447" y="75"/>
      <operator activated="true" class="loop_values" compatibility="5.0.11" expanded="true" height="76" name="Loop Values" width="90" x="581" y="75">
        <parameter key="attribute" value="att_1"/>
        <process expanded="true" height="708" width="922">
          <operator activated="true" class="generate_data" compatibility="5.0.11" expanded="true" height="60" name="Generate Data" width="90" x="45" y="30">
            <parameter key="number_of_attributes" value="1"/>
          </operator>
          <operator activated="true" class="rename" compatibility="5.0.11" expanded="true" height="76" name="Rename" width="90" x="179" y="30">
            <parameter key="old_name" value="att1"/>
            <parameter key="new_name" value="%{loop_value}"/>
          </operator>
          <operator activated="true" class="work_on_subset" compatibility="5.0.11" expanded="true" height="76" name="Work on Subset" width="90" x="313" y="30">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="label"/>
            <parameter key="invert_selection" value="true"/>
            <parameter key="keep_subset_only" value="true"/>
            <process expanded="true" height="708" width="922">
              <connect from_port="exampleSet" to_port="example set"/>
              <portSpacing port="source_exampleSet" spacing="0"/>
              <portSpacing port="sink_example set" spacing="0"/>
              <portSpacing port="sink_through 1" spacing="0"/>
            </process>
          </operator>
          <connect from_op="Generate Data" from_port="output" to_op="Rename" to_port="example set input"/>
          <connect from_op="Rename" from_port="example set output" to_op="Work on Subset" to_port="example set"/>
          <connect from_op="Work on Subset" from_port="example set" to_port="out 1"/>
          <portSpacing port="source_example set" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
      </operator>
      <connect from_op="Create Document" from_port="output" to_op="Documents to Data" to_port="documents 1"/>
      <connect from_op="Documents to Data" from_port="example set" to_op="Split" to_port="example set input"/>
      <connect from_op="Split" from_port="example set output" to_op="Transpose" to_port="example set input"/>
      <connect from_op="Transpose" from_port="example set output" to_op="Loop Values" to_port="example set"/>
      <connect from_op="Loop Values" from_port="out 1" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>



Thanks for the help.  Dan.
2 REPLIES
Contributor II haddock
Contributor II

Re: Combining Example Set Attributes

Hi there dman,

I think that you'll need some unifying Id to match the rows up, and a pivot to split out the test values by result set, like this...

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.1.003">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.1.003" expanded="true" name="Process">
    <process expanded="true" height="400" width="701">
      <operator activated="true" class="loop" compatibility="5.1.003" expanded="true" height="76" name="Generate N Sets" width="90" x="112" y="120">
        <parameter key="iterations" value="3"/>
        <process expanded="true" height="418" width="710">
          <operator activated="true" class="generate_data" compatibility="5.1.003" expanded="true" height="60" name="Generate Data" width="90" x="45" y="30">
            <parameter key="target_function" value="multi classification"/>
            <parameter key="number_of_attributes" value="4"/>
          </operator>
          <operator activated="true" class="select_attributes" compatibility="5.1.003" expanded="true" height="76" name="Just 1 Attribute" width="90" x="45" y="120">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="att1"/>
          </operator>
          <operator activated="true" class="rename" compatibility="5.1.003" expanded="true" height="76" name="Rename" width="90" x="179" y="120">
            <parameter key="old_name" value="att1"/>
            <parameter key="new_name" value="Result_set"/>
            <list key="rename_additional_attributes"/>
          </operator>
          <operator activated="true" class="generate_id" compatibility="5.1.003" expanded="true" height="76" name="Generate ID" width="90" x="246" y="30">
            <parameter key="create_nominal_ids" value="true"/>
          </operator>
          <operator activated="true" class="set_role" compatibility="5.1.003" expanded="true" height="76" name="Change label Role" width="90" x="380" y="30">
            <parameter key="name" value="id"/>
            <list key="set_additional_roles">
              <parameter key="label" value="regular"/>
            </list>
          </operator>
          <operator activated="true" class="replace" compatibility="5.1.003" expanded="true" height="76" name="Set RSet" width="90" x="514" y="30">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="label"/>
            <parameter key="replace_what" value=".*"/>
            <parameter key="replace_by" value="%{a}"/>
          </operator>
          <connect from_op="Generate Data" from_port="output" to_op="Just 1 Attribute" to_port="example set input"/>
          <connect from_op="Just 1 Attribute" from_port="example set output" to_op="Rename" to_port="example set input"/>
          <connect from_op="Rename" from_port="example set output" to_op="Generate ID" to_port="example set input"/>
          <connect from_op="Generate ID" from_port="example set output" to_op="Change label Role" to_port="example set input"/>
          <connect from_op="Change label Role" from_port="example set output" to_op="Set RSet" to_port="example set input"/>
          <connect from_op="Set RSet" from_port="example set output" to_port="output 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_output 1" spacing="0"/>
          <portSpacing port="sink_output 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="append" compatibility="5.1.003" expanded="true" height="76" name="Join 'em up" width="90" x="246" y="120"/>
      <operator activated="true" class="pivot" compatibility="5.1.003" expanded="true" height="76" name="As Columns" width="90" x="380" y="120">
        <parameter key="group_attribute" value="id"/>
        <parameter key="index_attribute" value="label"/>
        <parameter key="consider_weights" value="false"/>
      </operator>
      <operator activated="true" class="set_role" compatibility="5.1.003" expanded="true" height="76" name="with Row IDs" width="90" x="514" y="120">
        <parameter key="name" value="id"/>
        <parameter key="target_role" value="id"/>
        <list key="set_additional_roles"/>
      </operator>
      <connect from_op="Generate N Sets" from_port="output 1" to_op="Join 'em up" to_port="example set 1"/>
      <connect from_op="Join 'em up" from_port="merged set" to_op="As Columns" to_port="example set input"/>
      <connect from_op="As Columns" from_port="example set output" to_op="with Row IDs" to_port="example set input"/>
      <connect from_op="with Row IDs" from_port="example set output" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>


Which I hope is  the sort of thing you were after. A point to note is the regex replacement, which appears to repeat; I am most reluctant to even mutter the dreaded '*ug' word, but...  Smiley Wink


Contributor II binsetyawan
Contributor II

Re: Combining Example Set Attributes

i have a problem like this and i dont know how to solve yet

Polls
How can RapidMiner increase participation in our new competitions?
Twitter Feed