Due to recent updates, all users are required to create an Altair One account to login to the RapidMiner community. Click the Register button to create your account using the same email that you have previously used to login to the RapidMiner community. This will ensure that any previously created content will be synced to your Altair One account. Once you login, you will be asked to provide a username that identifies you to other Community users. Email us at Community with questions.
Generation of New Attribute - public question on support.rapidminer.com
JEdward
RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 578 Unicorn
Hi Krishna,
This is a response to your question here:
https://support.rapidminer.com/customer/en/portal/questions/13036889-generation-of-new-attribute?new=13036889
If I understand this correctly you do not begin with two columns of data.
You begin with 1 column (att1) and you want to calculate the value for att2 based on the result of the previous column.
In Excel you might represent this as
30 =A1
40 =A2+(B1/2)
50 =A3+(B2/2)
With the results
30 30
40 55
50 77.5
I RapidMiner you would use the operator Loop Examples to step through each example in order and calculate the result.
Try this example XML and see if it works for you:
This is a response to your question here:
https://support.rapidminer.com/customer/en/portal/questions/13036889-generation-of-new-attribute?new=13036889
If I understand this correctly you do not begin with two columns of data.
You begin with 1 column (att1) and you want to calculate the value for att2 based on the result of the previous column.
In Excel you might represent this as
30 =A1
40 =A2+(B1/2)
50 =A3+(B2/2)
With the results
30 30
40 55
50 77.5
I RapidMiner you would use the operator Loop Examples to step through each example in order and calculate the result.
Try this example XML and see if it works for you:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="6.4.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="6.4.000" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="subprocess" compatibility="6.4.000" expanded="true" height="76" name="Generate Test Data" width="90" x="45" y="30">
<process expanded="true">
<operator activated="true" class="text:create_document" compatibility="6.4.001" expanded="true" height="60" name="Create Document" width="90" x="45" y="30">
<parameter key="text" value="att1 30 40 50 10 20 "/>
</operator>
<operator activated="true" class="text:write_document" compatibility="6.4.001" expanded="true" height="76" name="Write Document" width="90" x="179" y="30"/>
<connect from_op="Create Document" from_port="output" to_op="Write Document" to_port="document"/>
<connect from_op="Write Document" from_port="file" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">This is some test data. You would normally swap this out with your own CSV.</description>
</operator>
<operator activated="true" class="read_csv" compatibility="6.4.000" expanded="true" height="60" name="Read CSV" width="90" x="179" y="30">
<parameter key="column_separators" value=","/>
<parameter key="first_row_as_names" value="false"/>
<list key="annotations">
<parameter key="0" value="Name"/>
</list>
<list key="data_set_meta_data_information">
<parameter key="0" value="att1.true.integer.attribute"/>
</list>
</operator>
<operator activated="true" class="generate_empty_attribute" compatibility="6.4.000" expanded="true" height="76" name="Generate Empty Attribute" width="90" x="313" y="30">
<parameter key="name" value="att2"/>
<description align="center" color="transparent" colored="false" width="126">Here we create a blank attribute called att2. We will begin to populate this in two operators time.</description>
</operator>
<operator activated="true" class="set_macro" compatibility="6.4.000" expanded="true" height="76" name="Set Macro" width="90" x="447" y="30">
<parameter key="macro" value="att3"/>
<parameter key="value" value="0"/>
<description align="center" color="transparent" colored="false" width="126">This creates the att3 macro and ensures on the first example the value is 0.</description>
</operator>
<operator activated="true" class="loop_examples" compatibility="6.4.000" expanded="true" height="76" name="Loop Examples" width="90" x="581" y="30">
<process expanded="true">
<operator activated="true" class="extract_macro" compatibility="6.4.000" expanded="true" height="60" name="Extract Macro (2)" width="90" x="45" y="30">
<parameter key="macro" value="att1"/>
<parameter key="macro_type" value="data_value"/>
<parameter key="attribute_name" value="att1"/>
<parameter key="example_index" value="%{example}"/>
<list key="additional_macros"/>
<description align="center" color="transparent" colored="false" width="126">Here the current value of att1 is extracted into a macro to use in the next operator</description>
</operator>
<operator activated="true" class="generate_macro" compatibility="6.4.000" expanded="true" height="76" name="Generate Macro" width="90" x="179" y="30">
<list key="function_descriptions">
<parameter key="att2" value="%{att1} + (%{att3} * 0.5)"/>
</list>
<description align="center" color="transparent" colored="false" width="126">This operator generates another macro called att2 which is the final calculation based on the att1 &amp; att3 macro values.</description>
</operator>
<operator activated="true" class="set_data" compatibility="6.4.000" expanded="true" height="76" name="Set Data" width="90" x="313" y="30">
<parameter key="example_index" value="%{example}"/>
<parameter key="attribute_name" value="att2"/>
<parameter key="value" value="%{att2}"/>
<list key="additional_values"/>
<description align="center" color="transparent" colored="false" width="126">Next we set the value of the current example of att2 with the value of the macro %{att2}</description>
</operator>
<operator activated="true" class="extract_macro" compatibility="6.4.000" expanded="true" height="60" name="Extract Macro" width="90" x="447" y="30">
<parameter key="macro" value="att3"/>
<parameter key="macro_type" value="data_value"/>
<parameter key="attribute_name" value="att2"/>
<parameter key="example_index" value="%{example}"/>
<list key="additional_macros"/>
<description align="center" color="transparent" colored="false" width="126">Lastly we extract the current value of the attribute att2 and store it as a macro named att3. This will then be remembered in the next loop.</description>
</operator>
<connect from_port="example set" to_op="Extract Macro (2)" to_port="example set"/>
<connect from_op="Extract Macro (2)" from_port="example set" to_op="Generate Macro" to_port="through 1"/>
<connect from_op="Generate Macro" from_port="through 1" to_op="Set Data" to_port="example set input"/>
<connect from_op="Set Data" from_port="example set output" to_op="Extract Macro" to_port="example set"/>
<connect from_op="Extract Macro" from_port="example set" to_port="example set"/>
<portSpacing port="source_example set" spacing="0"/>
<portSpacing port="sink_example set" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<description align="center" color="yellow" colored="false" height="128" resized="true" width="336" x="11" y="256">Because this is all happening in a loop there is no if statement required.</description>
</process>
<description align="center" color="transparent" colored="false" width="126">This loops through all examples. Note the iteration macro to keep track of the number in the loop.</description>
</operator>
<connect from_op="Generate Test Data" from_port="out 1" to_op="Read CSV" to_port="file"/>
<connect from_op="Read CSV" from_port="output" to_op="Generate Empty Attribute" to_port="example set input"/>
<connect from_op="Generate Empty Attribute" from_port="example set output" to_op="Set Macro" to_port="through 1"/>
<connect from_op="Set Macro" from_port="through 1" to_op="Loop Examples" to_port="example set"/>
<connect from_op="Loop Examples" from_port="example set" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="90"/>
</process>
</operator>
</process>
0
Answers
It worked as per requirement.