RapidMiner

append different aggregating results of columns into one table?

Guru
Guru

append different aggregating results of columns into one table?

hi,

I want to calculate some average, median, max, min etc. (those that are not in the metadata results) of the data in one table, 

I use several aggregate operators on that dataset to do this, and then append the results. However, the Append operator is complaining, that column names or so are not matching, but the column number is matching, I just want to append them no matter what the columns are. Is that anyhow  possible?

 

<?xml version="1.0" encoding="UTF-8"?><process version="7.2.003">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="7.2.003" expanded="true" height="68" name="data" width="90" x="179" y="34">
        <parameter key="repository_entry" value="//Cloud Repository/Gefuegedaten"/>
      </operator>
      <operator activated="true" class="multiply" compatibility="7.2.003" expanded="true" height="166" name="Multiply" width="90" x="246" y="289"/>
      <operator activated="true" class="aggregate" compatibility="6.0.006" expanded="true" height="82" name="Aggregate" width="90" x="380" y="136">
        <parameter key="use_default_aggregation" value="true"/>
        <parameter key="include_special_attributes" value="true"/>
        <parameter key="default_aggregation_function" value="median"/>
        <list key="aggregation_attributes"/>
      </operator>
      <operator activated="true" class="aggregate" compatibility="6.0.006" expanded="true" height="82" name="Aggregate (2)" width="90" x="380" y="238">
        <parameter key="use_default_aggregation" value="true"/>
        <parameter key="include_special_attributes" value="true"/>
        <parameter key="default_aggregation_function" value="minimum"/>
        <list key="aggregation_attributes"/>
      </operator>
      <operator activated="true" class="aggregate" compatibility="6.0.006" expanded="true" height="82" name="Aggregate (4)" width="90" x="380" y="34">
        <parameter key="use_default_aggregation" value="true"/>
        <parameter key="include_special_attributes" value="true"/>
        <parameter key="default_aggregation_function" value="maximum"/>
        <list key="aggregation_attributes"/>
      </operator>
      <operator activated="true" class="aggregate" compatibility="6.0.006" expanded="true" height="82" name="Aggregate (3)" width="90" x="581" y="289">
        <parameter key="use_default_aggregation" value="true"/>
        <parameter key="include_special_attributes" value="true"/>
        <list key="aggregation_attributes"/>
      </operator>
      <operator activated="true" class="aggregate" compatibility="6.0.006" expanded="true" height="82" name="Aggregate (5)" width="90" x="648" y="391">
        <parameter key="use_default_aggregation" value="true"/>
        <parameter key="include_special_attributes" value="true"/>
        <parameter key="default_aggregation_function" value="standard_deviation"/>
        <list key="aggregation_attributes"/>
      </operator>
      <operator activated="true" class="append" compatibility="7.2.003" expanded="true" height="166" name="Append" width="90" x="581" y="34">
        <parameter key="datamanagement" value="sparse_map"/>
      </operator>
      <connect from_op="data" from_port="output" to_op="Multiply" to_port="input"/>
      <connect from_op="Multiply" from_port="output 1" to_op="Aggregate (2)" to_port="example set input"/>
      <connect from_op="Multiply" from_port="output 2" to_op="Aggregate" to_port="example set input"/>
      <connect from_op="Multiply" from_port="output 3" to_op="Aggregate (4)" to_port="example set input"/>
      <connect from_op="Multiply" from_port="output 4" to_op="Aggregate (3)" to_port="example set input"/>
      <connect from_op="Multiply" from_port="output 5" to_op="Aggregate (5)" to_port="example set input"/>
      <connect from_op="Aggregate" from_port="example set output" to_op="Append" to_port="example set 2"/>
      <connect from_op="Aggregate (2)" from_port="example set output" to_op="Append" to_port="example set 3"/>
      <connect from_op="Aggregate (4)" from_port="example set output" to_op="Append" to_port="example set 1"/>
      <connect from_op="Aggregate (3)" from_port="example set output" to_op="Append" to_port="example set 4"/>
      <connect from_op="Aggregate (5)" from_port="example set output" to_op="Append" to_port="example set 5"/>
      <connect from_op="Append" from_port="merged set" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>
2 REPLIES
Highlighted
RM Certified Expert
RM Certified Expert

Re: append different aggregating results of columns into one table?

Hello

 

The Append operator requires the attribute names to be identical in all the example sets. In your case, the names will all be different. Perhaps you want to Join the attributes instead? Here's an example that might help...

 

<?xml version="1.0" encoding="UTF-8"?><process version="7.2.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="7.2.000" expanded="true" height="68" name="data" width="90" x="179" y="34">
        <parameter key="repository_entry" value="//Samples/data/Iris"/>
      </operator>
      <operator activated="true" class="aggregate" compatibility="7.2.000" expanded="true" height="82" name="Aggregate (4)" width="90" x="380" y="34">
        <parameter key="use_default_aggregation" value="true"/>
        <parameter key="include_special_attributes" value="true"/>
        <parameter key="default_aggregation_function" value="maximum"/>
        <list key="aggregation_attributes"/>
      </operator>
      <operator activated="true" class="aggregate" compatibility="6.0.006" expanded="true" height="82" name="Aggregate" width="90" x="380" y="136">
        <parameter key="use_default_aggregation" value="true"/>
        <parameter key="include_special_attributes" value="true"/>
        <parameter key="default_aggregation_function" value="median"/>
        <list key="aggregation_attributes"/>
      </operator>
      <operator activated="true" class="aggregate" compatibility="6.0.006" expanded="true" height="82" name="Aggregate (2)" width="90" x="380" y="238">
        <parameter key="use_default_aggregation" value="true"/>
        <parameter key="include_special_attributes" value="true"/>
        <parameter key="default_aggregation_function" value="minimum"/>
        <list key="aggregation_attributes"/>
      </operator>
      <operator activated="true" class="generate_id" compatibility="7.2.000" expanded="true" height="82" name="Generate ID (3)" width="90" x="581" y="238"/>
      <operator activated="true" class="generate_id" compatibility="7.2.000" expanded="true" height="82" name="Generate ID" width="90" x="581" y="34"/>
      <operator activated="true" class="aggregate" compatibility="6.0.006" expanded="true" height="82" name="Aggregate (3)" width="90" x="380" y="340">
        <parameter key="use_default_aggregation" value="true"/>
        <parameter key="include_special_attributes" value="true"/>
        <list key="aggregation_attributes"/>
      </operator>
      <operator activated="true" class="generate_id" compatibility="7.2.000" expanded="true" height="82" name="Generate ID (4)" width="90" x="581" y="340"/>
      <operator activated="true" class="aggregate" compatibility="6.0.006" expanded="true" height="82" name="Aggregate (5)" width="90" x="380" y="442">
        <parameter key="use_default_aggregation" value="true"/>
        <parameter key="include_special_attributes" value="true"/>
        <parameter key="default_aggregation_function" value="standard_deviation"/>
        <list key="aggregation_attributes"/>
      </operator>
      <operator activated="true" class="generate_id" compatibility="7.2.000" expanded="true" height="82" name="Generate ID (5)" width="90" x="581" y="442"/>
      <operator activated="true" class="generate_id" compatibility="7.2.000" expanded="true" height="82" name="Generate ID (2)" width="90" x="581" y="136"/>
      <operator activated="true" class="join" compatibility="7.2.000" expanded="true" height="82" name="Join" width="90" x="782" y="85">
        <list key="key_attributes"/>
      </operator>
      <operator activated="true" class="join" compatibility="7.2.000" expanded="true" height="82" name="Join (2)" width="90" x="782" y="187">
        <list key="key_attributes"/>
      </operator>
      <operator activated="true" class="join" compatibility="7.2.000" expanded="true" height="82" name="Join (3)" width="90" x="782" y="289">
        <list key="key_attributes"/>
      </operator>
      <operator activated="true" class="join" compatibility="7.2.000" expanded="true" height="82" name="Join (4)" width="90" x="782" y="391">
        <list key="key_attributes"/>
      </operator>
      <connect from_op="data" from_port="output" to_op="Aggregate (4)" to_port="example set input"/>
      <connect from_op="Aggregate (4)" from_port="example set output" to_op="Generate ID" to_port="example set input"/>
      <connect from_op="Aggregate (4)" from_port="original" to_op="Aggregate" to_port="example set input"/>
      <connect from_op="Aggregate" from_port="example set output" to_op="Generate ID (2)" to_port="example set input"/>
      <connect from_op="Aggregate" from_port="original" to_op="Aggregate (2)" to_port="example set input"/>
      <connect from_op="Aggregate (2)" from_port="example set output" to_op="Generate ID (3)" to_port="example set input"/>
      <connect from_op="Aggregate (2)" from_port="original" to_op="Aggregate (3)" to_port="example set input"/>
      <connect from_op="Generate ID (3)" from_port="example set output" to_op="Join (2)" to_port="right"/>
      <connect from_op="Generate ID" from_port="example set output" to_op="Join" to_port="left"/>
      <connect from_op="Aggregate (3)" from_port="example set output" to_op="Generate ID (4)" to_port="example set input"/>
      <connect from_op="Aggregate (3)" from_port="original" to_op="Aggregate (5)" to_port="example set input"/>
      <connect from_op="Generate ID (4)" from_port="example set output" to_op="Join (3)" to_port="right"/>
      <connect from_op="Aggregate (5)" from_port="example set output" to_op="Generate ID (5)" to_port="example set input"/>
      <connect from_op="Generate ID (5)" from_port="example set output" to_op="Join (4)" to_port="right"/>
      <connect from_op="Generate ID (2)" from_port="example set output" to_op="Join" to_port="right"/>
      <connect from_op="Join" from_port="join" to_op="Join (2)" to_port="left"/>
      <connect from_op="Join (2)" from_port="join" to_op="Join (3)" to_port="left"/>
      <connect from_op="Join (3)" from_port="join" to_op="Join (4)" to_port="left"/>
      <connect from_op="Join (4)" from_port="join" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

 

regards

 

Andrew

RM Certified Expert
RM Certified Expert

Re: append different aggregating results of columns into one table?

You can also use "rename by generic names" if you know they really are the same and then simply rename them after.

Brian T., Lindon Ventures - www.lindonventures.com
Analytics Consulting by Certified RapidMiner Analysts