Due to recent updates, all users are required to create an Altair One account to login to the RapidMiner community. Click the Register button to create your account using the same email that you have previously used to login to the RapidMiner community. This will ensure that any previously created content will be synced to your Altair One account. Once you login, you will be asked to provide a username that identifies you to other Community users. Email us at Community with questions.
Generate New Attribute
Hello,
I want to generate an attribute [AttNew]. The values of the new attribute should be according to the following rule:
- If ID xy only exist one time
- --> For ID xy Take the value of the Att1 for the new Attribute [AttNew]
- If there are several identical IDs,
- -> take all the values of Att 1 of the identical IDs
- --> and include them as a new attribute value for the generated attribute [2]
The reason behind that, I want to remove duplicate (or more) IDs but do not want to lose any information.
Below is the picture
(1)
To
(2)
Is there an operator which does that? Or can anyone help how to describe the function?
Thank you very much!
0
Best Answers
-
Telcontar120 RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 1,635 UnicornYes, you can do this. Pivot first so you have each id in its own row and multiple attributes for each value and then use the concatenation function to combine the attributes into a single new one.5
-
Telcontar120 RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 1,635 UnicornYes, it is because after pivoting the name of the attributes changes based on the values. It is a bit more complicated, but you can avoid this by selecting using a regular expression to capture any attribute created that has the same prefix (which comes from the original attribute name before pivoting). This is better than the explicit subset selection since it also will adapt in the future if you get any new attribute values.5
Answers
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="9.4.000" expanded="true" name="Process" origin="GENERATED_TUTORIAL">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="9.7.000" expanded="true" height="68" name="Retrieve combine " width="90" x="45" y="34">
<parameter key="repository_entry" value="//Local Repository/Datensatz V2/combine "/>
</operator>
<operator activated="true" class="blending:pivot" compatibility="9.7.000" expanded="true" height="82" name="Pivot" width="90" x="179" y="34">
<parameter key="group_by_attributes" value="ID"/>
<parameter key="column_grouping_attribute" value="Att1"/>
<list key="aggregation_attributes">
<parameter key="Att1" value="concatenation"/>
</list>
<parameter key="use_default_aggregation" value="false"/>
<parameter key="default_aggregation_function" value="first"/>
</operator>
<operator activated="true" class="rename_by_generic_names" compatibility="9.7.000" expanded="true" height="82" name="Rename by Generic Names" width="90" x="313" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="ID"/>
<parameter key="attributes" value="concatenation(Att1)_bus|concatenation(Att1)_car|concatenation(Att1)_truck"/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="polynominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="true"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="generic_name_stem" value="att"/>
</operator>
<operator activated="true" class="generate_aggregation" compatibility="6.5.002" expanded="true" height="82" name="Concatenation by Renaming" origin="GENERATED_TUTORIAL" width="90" x="447" y="34">
<parameter key="attribute_name" value="Attnew"/>
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attribute" value="concatenation(Att1)_car"/>
<parameter key="attributes" value="att1|att2|att3"/>
<parameter key="regular_expression" value="concatenation(Att1)_busconcatenation(Att1)_carconcatenation(Att1)_truck"/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="polynominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="polynominal"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="true"/>
<parameter key="aggregation_function" value="concatenation"/>
<parameter key="concatenation_separator" value=";"/>
<parameter key="keep_all" value="false"/>
<parameter key="ignore_missings" value="false"/>
<parameter key="ignore_missing_attributes" value="false"/>
</operator>
<operator activated="true" class="retrieve" compatibility="9.7.000" expanded="true" height="68" name="Retrieve combine (2)" width="90" x="45" y="187">
<parameter key="repository_entry" value="//Local Repository/Datensatz V2/combine "/>
</operator>
<operator activated="true" class="blending:pivot" compatibility="9.7.000" expanded="true" height="82" name="Pivot (2)" width="90" x="179" y="187">
<parameter key="group_by_attributes" value="ID"/>
<parameter key="column_grouping_attribute" value="Att1"/>
<list key="aggregation_attributes">
<parameter key="Att1" value="concatenation"/>
</list>
<parameter key="use_default_aggregation" value="false"/>
<parameter key="default_aggregation_function" value="first"/>
</operator>
<operator activated="true" class="generate_aggregation" compatibility="6.5.002" expanded="true" height="82" name="Concatenation Normal" origin="GENERATED_TUTORIAL" width="90" x="313" y="187">
<parameter key="attribute_name" value="Attnew"/>
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attribute" value="concatenation(Att1)_car"/>
<parameter key="attributes" value="concatenation(Att1)_bus|concatenation(Att1)_car|concatenation(Att1)_truck"/>
<parameter key="regular_expression" value="concatenation(Att1)_busconcatenation(Att1)_carconcatenation(Att1)_truck"/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="polynominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="polynominal"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="true"/>
<parameter key="aggregation_function" value="concatenation"/>
<parameter key="concatenation_separator" value=";"/>
<parameter key="keep_all" value="true"/>
<parameter key="ignore_missings" value="true"/>
<parameter key="ignore_missing_attributes" value="false"/>
</operator>
<connect from_op="Retrieve combine " from_port="output" to_op="Pivot" to_port="input"/>
<connect from_op="Pivot" from_port="output" to_op="Rename by Generic Names" to_port="example set input"/>
<connect from_op="Rename by Generic Names" from_port="example set output" to_op="Concatenation by Renaming" to_port="example set input"/>
<connect from_op="Concatenation by Renaming" from_port="example set output" to_port="result 3"/>
<connect from_op="Retrieve combine (2)" from_port="output" to_op="Pivot (2)" to_port="input"/>
<connect from_op="Pivot (2)" from_port="output" to_op="Concatenation Normal" to_port="example set input"/>
<connect from_op="Concatenation Normal" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
</process>
</operator>
</process>