RapidMiner

how to get "aggregate output table" with some "count cell" is 0 ?

SOLVED
Highlighted
Contributor II dkpengqiuyang
Contributor II

how to get "aggregate output table" with some "count cell" is 0 ?

greeting,

how can I get a table with aggregate even if some of the rows has a count = 0?

I have a table like this, which show test result of samples in 3 days:

time  result

301    ok

301    ng

302    ok

303    ok

303    ok

303    ng

and I want a aggregate table like this:

time   ok   ng

301     1     1

302     1     0

303     2     1

but infact I get this, which 302 is missing:

time  ok  ng

301    1     1

303    2     1

and my program is show as the jpg.

I use filter to get ok and ng sample set, aggregate with "count" function, then join ok and ng to one table.

 

so how can I get a table with 302 ?

thank you for your help.

2 REPLIES
RM Staff
RM Staff
Solution

Re: how to get "aggregate output table" with some "count cell" is 0 ?

Hi,

i think Aggregate in combination with pivot does the job. See attached process.

 

Best,

Martin

<?xml version="1.0" encoding="UTF-8"?><process version="7.4.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="7.4.000" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="subprocess" compatibility="7.4.000" expanded="true" height="82" name="Subprocess" width="90" x="45" y="34">
        <process expanded="true">
          <operator activated="true" class="generate_data_user_specification" compatibility="7.4.000" expanded="true" height="68" name="Generate Data by User Specification" width="90" x="45" y="34">
            <list key="attribute_values">
              <parameter key="time" value="301"/>
              <parameter key="result" value="&quot;ok&quot;"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="7.4.000" expanded="true" height="68" name="Generate Data by User Specification (2)" width="90" x="45" y="136">
            <list key="attribute_values">
              <parameter key="time" value="301"/>
              <parameter key="result" value="&quot;ng&quot;"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="7.4.000" expanded="true" height="68" name="Generate Data by User Specification (3)" width="90" x="45" y="238">
            <list key="attribute_values">
              <parameter key="time" value="302"/>
              <parameter key="result" value="&quot;ok&quot;"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="7.4.000" expanded="true" height="68" name="Generate Data by User Specification (4)" width="90" x="45" y="340">
            <list key="attribute_values">
              <parameter key="time" value="302"/>
              <parameter key="result" value="&quot;ok&quot;"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="7.4.000" expanded="true" height="68" name="Generate Data by User Specification (5)" width="90" x="45" y="442">
            <list key="attribute_values">
              <parameter key="time" value="303"/>
              <parameter key="result" value="&quot;ok&quot;"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="7.4.000" expanded="true" height="68" name="Generate Data by User Specification (6)" width="90" x="45" y="544">
            <list key="attribute_values">
              <parameter key="time" value="303"/>
              <parameter key="result" value="&quot;ng&quot;"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="append" compatibility="7.4.000" expanded="true" height="166" name="Append" width="90" x="246" y="340"/>
          <connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 1"/>
          <connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 2"/>
          <connect from_op="Generate Data by User Specification (4)" from_port="output" to_op="Append" to_port="example set 3"/>
          <connect from_op="Generate Data by User Specification (5)" from_port="output" to_op="Append" to_port="example set 4"/>
          <connect from_op="Generate Data by User Specification (6)" from_port="output" to_op="Append" to_port="example set 5"/>
          <connect from_op="Append" from_port="merged set" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="aggregate" compatibility="7.4.000" expanded="true" height="82" name="Aggregate" width="90" x="179" y="34">
        <list key="aggregation_attributes">
          <parameter key="result" value="count"/>
        </list>
        <parameter key="group_by_attributes" value="time|result"/>
      </operator>
      <operator activated="true" class="pivot" compatibility="7.4.000" expanded="true" height="82" name="Pivot" width="90" x="313" y="34">
        <parameter key="group_attribute" value="time"/>
        <parameter key="index_attribute" value="result"/>
        <parameter key="weight_aggregation" value="count"/>
        <parameter key="skip_constant_attributes" value="false"/>
      </operator>
      <operator activated="true" class="replace_missing_values" compatibility="7.4.000" expanded="true" height="103" name="Replace Missing Values" width="90" x="514" y="34">
        <parameter key="default" value="zero"/>
        <list key="columns"/>
      </operator>
      <connect from_op="Subprocess" from_port="out 1" to_op="Aggregate" to_port="example set input"/>
      <connect from_op="Aggregate" from_port="example set output" to_op="Pivot" to_port="example set input"/>
      <connect from_op="Pivot" from_port="example set output" to_op="Replace Missing Values" to_port="example set input"/>
      <connect from_op="Replace Missing Values" from_port="example set output" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>
--------------------------------------------------------------------------
Head of Data Science Services at RapidMiner
Contributor II dkpengqiuyang
Contributor II

Re: how to get "aggregate output table" with some "count cell" is 0 ?

i use generate attribute with if([confidence(OK)]>=0.5,1,0) to transform the prediction result from word into data, then use aggregate with sum, to get a full date table.

thank you for your help.