RapidMiner 9.7 is Now Available

Lots of amazing new improvements including true version control! Learn more about what's new here.

CLICK HERE TO DOWNLOAD

Generate New Attribute

LeMarcLeMarc Member Posts: 72 Contributor II
edited July 26 in Help
Hello,

I want to generate an attribute [AttNew]. The values of the new attribute should be according to the following rule:
  • If ID xy only exist one time
  • --> For ID xy Take the value of the Att1 for the new Attribute [AttNew]
  • If there are several identical IDs,
  • -> take all the values of Att 1 of the identical IDs
  • --> and include them as a new attribute value for the generated attribute [2]
The reason behind that, I want to remove duplicate (or more) IDs but do not want to lose any information.
Below is the picture
(1)

To
(2)


Is there an operator which does that? Or can anyone help how to describe the function?


Thank you very much!

Best Answers

Answers

  • LeMarcLeMarc Member Posts: 72 Contributor II
    Thank you @Telcontar120 for the solution.

    I noticed that if I select a subset of attributes for the aggregation function, it does not work. Is it because the name of attributes available for "Select Attributes" and the actual name of the attributes of the example set after pivoting do not match?
    My workaround is just to rename the attribute names.

    Is there maybe an easier solution to that? Or a reason why the attribute names dont match?

    Thanks!

    <?xml version="1.0" encoding="UTF-8"?><process version="9.7.000">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.4.000" expanded="true" name="Process" origin="GENERATED_TUTORIAL">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="retrieve" compatibility="9.7.000" expanded="true" height="68" name="Retrieve combine " width="90" x="45" y="34">
            <parameter key="repository_entry" value="//Local Repository/Datensatz  V2/combine "/>
          </operator>
          <operator activated="true" class="blending:pivot" compatibility="9.7.000" expanded="true" height="82" name="Pivot" width="90" x="179" y="34">
            <parameter key="group_by_attributes" value="ID"/>
            <parameter key="column_grouping_attribute" value="Att1"/>
            <list key="aggregation_attributes">
              <parameter key="Att1" value="concatenation"/>
            </list>
            <parameter key="use_default_aggregation" value="false"/>
            <parameter key="default_aggregation_function" value="first"/>
          </operator>
          <operator activated="true" class="rename_by_generic_names" compatibility="9.7.000" expanded="true" height="82" name="Rename by Generic Names" width="90" x="313" y="34">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="ID"/>
            <parameter key="attributes" value="concatenation(Att1)_bus|concatenation(Att1)_car|concatenation(Att1)_truck"/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="polynominal"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="true"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="generic_name_stem" value="att"/>
          </operator>
          <operator activated="true" class="generate_aggregation" compatibility="6.5.002" expanded="true" height="82" name="Concatenation by Renaming" origin="GENERATED_TUTORIAL" width="90" x="447" y="34">
            <parameter key="attribute_name" value="Attnew"/>
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attribute" value="concatenation(Att1)_car"/>
            <parameter key="attributes" value="att1|att2|att3"/>
            <parameter key="regular_expression" value="concatenation(Att1)_busconcatenation(Att1)_carconcatenation(Att1)_truck"/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="polynominal"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="polynominal"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="true"/>
            <parameter key="aggregation_function" value="concatenation"/>
            <parameter key="concatenation_separator" value=";"/>
            <parameter key="keep_all" value="false"/>
            <parameter key="ignore_missings" value="false"/>
            <parameter key="ignore_missing_attributes" value="false"/>
          </operator>
          <operator activated="true" class="retrieve" compatibility="9.7.000" expanded="true" height="68" name="Retrieve combine  (2)" width="90" x="45" y="187">
            <parameter key="repository_entry" value="//Local Repository/Datensatz  V2/combine "/>
          </operator>
          <operator activated="true" class="blending:pivot" compatibility="9.7.000" expanded="true" height="82" name="Pivot (2)" width="90" x="179" y="187">
            <parameter key="group_by_attributes" value="ID"/>
            <parameter key="column_grouping_attribute" value="Att1"/>
            <list key="aggregation_attributes">
              <parameter key="Att1" value="concatenation"/>
            </list>
            <parameter key="use_default_aggregation" value="false"/>
            <parameter key="default_aggregation_function" value="first"/>
          </operator>
          <operator activated="true" class="generate_aggregation" compatibility="6.5.002" expanded="true" height="82" name="Concatenation Normal" origin="GENERATED_TUTORIAL" width="90" x="313" y="187">
            <parameter key="attribute_name" value="Attnew"/>
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attribute" value="concatenation(Att1)_car"/>
            <parameter key="attributes" value="concatenation(Att1)_bus|concatenation(Att1)_car|concatenation(Att1)_truck"/>
            <parameter key="regular_expression" value="concatenation(Att1)_busconcatenation(Att1)_carconcatenation(Att1)_truck"/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="polynominal"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="polynominal"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="true"/>
            <parameter key="aggregation_function" value="concatenation"/>
            <parameter key="concatenation_separator" value=";"/>
            <parameter key="keep_all" value="true"/>
            <parameter key="ignore_missings" value="true"/>
            <parameter key="ignore_missing_attributes" value="false"/>
          </operator>
          <connect from_op="Retrieve combine " from_port="output" to_op="Pivot" to_port="input"/>
          <connect from_op="Pivot" from_port="output" to_op="Rename by Generic Names" to_port="example set input"/>
          <connect from_op="Rename by Generic Names" from_port="example set output" to_op="Concatenation by Renaming" to_port="example set input"/>
          <connect from_op="Concatenation by Renaming" from_port="example set output" to_port="result 3"/>
          <connect from_op="Retrieve combine  (2)" from_port="output" to_op="Pivot (2)" to_port="input"/>
          <connect from_op="Pivot (2)" from_port="output" to_op="Concatenation Normal" to_port="example set input"/>
          <connect from_op="Concatenation Normal" from_port="example set output" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
          <portSpacing port="sink_result 3" spacing="0"/>
          <portSpacing port="sink_result 4" spacing="0"/>
        </process>
      </operator>
    </process>



  • LeMarcLeMarc Member Posts: 72 Contributor II
    Thank you for the hint!
Sign In or Register to comment.