Due to recent updates, all users are required to create an Altair One account to login to the RapidMiner community. Click the Register button to create your account using the same email that you have previously used to login to the RapidMiner community. This will ensure that any previously created content will be synced to your Altair One account. Once you login, you will be asked to provide a username that identifies you to other Community users. Email us at Community with questions.
[SOLVED] Replace missing value with subgroup average
Hi to all,
I'm a new user with rapid miner.
I start to use some package and immediatly i'm blocked in a problem.
I want to replace some missing value with the average of the same attribute grouped with an other attribute. For example i have
hour - value 1 - value 2
1 - 10 - 20
2 - 15 - 25
3 - 32 - 8
1 - 12 - 18
2 - 10 - 29
3 - 27 - 11
1 - 5 - 24
2 - 14 - 20
3 - 10 - 3
1 - ? - ?
should became
1 - (10+12+5)/3 - (20+18+24)/3
there's a way to do this thing??
thanks all
I'm a new user with rapid miner.
I start to use some package and immediatly i'm blocked in a problem.
I want to replace some missing value with the average of the same attribute grouped with an other attribute. For example i have
hour - value 1 - value 2
1 - 10 - 20
2 - 15 - 25
3 - 32 - 8
1 - 12 - 18
2 - 10 - 29
3 - 27 - 11
1 - 5 - 24
2 - 14 - 20
3 - 10 - 3
1 - ? - ?
should became
1 - (10+12+5)/3 - (20+18+24)/3
there's a way to do this thing??
thanks all
0
Answers
you can do this using a loop values, and then do the replacement for each group. Attached is an example process.
Cheers,
Martin
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="6.4.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="6.4.000" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" breakpoints="after" class="subprocess" compatibility="6.4.000" expanded="true" height="76" name="Generate Data (2)" width="90" x="45" y="75">
<process expanded="true">
<operator activated="true" class="generate_data" compatibility="6.4.000" expanded="true" height="60" name="Generate Data" width="90" x="179" y="75">
<parameter key="number_examples" value="5"/>
<parameter key="number_of_attributes" value="3"/>
</operator>
<operator activated="true" class="discretize_by_bins" compatibility="6.4.000" expanded="true" height="94" name="Discretize" width="90" x="313" y="75">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="att1"/>
<parameter key="range_name_type" value="short"/>
</operator>
<operator activated="true" class="set_data" compatibility="6.4.000" expanded="true" height="76" name="Set Data" width="90" x="447" y="75">
<parameter key="example_index" value="2"/>
<parameter key="attribute_name" value="att2"/>
<parameter key="value" value="-1"/>
<list key="additional_values"/>
</operator>
<operator activated="true" class="declare_missing_value" compatibility="6.4.000" expanded="true" height="76" name="Declare Missing Value" width="90" x="581" y="75">
<parameter key="numeric_value" value="-1.0"/>
</operator>
<connect from_op="Generate Data" from_port="output" to_op="Discretize" to_port="example set input"/>
<connect from_op="Discretize" from_port="example set output" to_op="Set Data" to_port="example set input"/>
<connect from_op="Set Data" from_port="example set output" to_op="Declare Missing Value" to_port="example set input"/>
<connect from_op="Declare Missing Value" from_port="example set output" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">Just generate an example set for demo purposes</description>
</operator>
<operator activated="true" class="loop_values" compatibility="6.4.000" expanded="true" height="94" name="Loop Values" width="90" x="313" y="75">
<parameter key="attribute" value="att1"/>
<process expanded="true">
<operator activated="true" class="multiply" compatibility="6.4.000" expanded="true" height="94" name="Multiply" width="90" x="45" y="30"/>
<operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="94" name="Filter Examples" width="90" x="246" y="165">
<list key="filters_list">
<parameter key="filters_entry_key" value="att1.equals.%{loop_value}"/>
</list>
<description align="center" color="transparent" colored="false" width="126">Filter for each range</description>
</operator>
<operator activated="true" class="replace_missing_values" compatibility="6.4.000" expanded="true" height="94" name="Replace Missing Values" width="90" x="380" y="165">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="att2"/>
<list key="columns"/>
<description align="center" color="transparent" colored="false" width="126">Replace by Average</description>
</operator>
<connect from_port="example set" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_port="out 1"/>
<connect from_op="Multiply" from_port="output 2" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Replace Missing Values" to_port="example set input"/>
<connect from_op="Replace Missing Values" from_port="example set output" to_port="out 2"/>
<portSpacing port="source_example set" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
<portSpacing port="sink_out 3" spacing="0"/>
</process>
</operator>
<operator activated="true" class="append" compatibility="6.4.000" expanded="true" height="76" name="Append" width="90" x="447" y="75"/>
<connect from_op="Generate Data (2)" from_port="out 1" to_op="Loop Values" to_port="example set"/>
<connect from_op="Loop Values" from_port="out 2" to_op="Append" to_port="example set 1"/>
<connect from_op="Append" from_port="merged set" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
Dortmund, Germany