How can I aggregate a maximum from all the columns that contain confidence?

Prentice · April 2019

Hi,

Ok, so I've asked this question twice before together with another question (which was probably not the best idea) and never got an answer for it.

So I've got a model that does a second prediction if the confidence is lower than 0.7. For this I aggregated the confidences and made a new attribute with the maximum of these. Which is where the second prediction is based on.
The problem is that I've now manually selected the confidence attributes, but in a case with text categorisation these confidence attributes are words which are variable. So my question is how can I aggregate a maximum from all the columns that contain confidence(*word*), which in the case of this example would be the three flowers. I've tried to do it with regular expressions, but it just doesn't work for me. I've also tried generate attributes or the regular aggregate but I just don't know how.

<?xml version="1.0" encoding="UTF-8"?><process version="9.2.001">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="9.2.001" expanded="true" name="Process">
    <parameter key="logverbosity" value="init"/>
    <parameter key="random_seed" value="2001"/>
    <parameter key="send_mail" value="never"/>
    <parameter key="notification_email" value=""/>
    <parameter key="process_duration_for_mail" value="30"/>
    <parameter key="encoding" value="UTF-8"/>
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="9.2.001" expanded="true" height="68" name="Retrieve Iris" width="90" x="45" y="34">
        <parameter key="repository_entry" value="//Samples/data/Iris"/>
      </operator>
      <operator activated="true" class="split_data" compatibility="9.2.001" expanded="true" height="103" name="Split Data" width="90" x="179" y="85">
        <enumeration key="partitions">
          <parameter key="ratio" value="0.66"/>
          <parameter key="ratio" value="0.34"/>
        </enumeration>
        <parameter key="sampling_type" value="automatic"/>
        <parameter key="use_local_random_seed" value="false"/>
        <parameter key="local_random_seed" value="1992"/>
      </operator>
      <operator activated="true" class="naive_bayes" compatibility="9.2.001" expanded="true" height="82" name="Naive Bayes" width="90" x="313" y="34">
        <parameter key="laplace_correction" value="true"/>
      </operator>
      <operator activated="true" class="apply_model" compatibility="9.2.001" expanded="true" height="82" name="Apply Model" width="90" x="313" y="136">
        <list key="application_parameters"/>
        <parameter key="create_view" value="false"/>
      </operator>
      <operator activated="true" class="generate_aggregation" compatibility="9.2.001" expanded="true" height="82" name="Generate Aggregation (3)" width="90" x="447" y="238">
        <parameter key="attribute_name" value="Maximum"/>
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attribute" value=""/>
        <parameter key="attributes" value="confidence(Iris-setosa)|confidence(Iris-versicolor)|confidence(Iris-virginica)"/>
        <parameter key="regular_expression" value="confidence(Iris-setosa)"/>
        <parameter key="use_except_expression" value="false"/>
        <parameter key="value_type" value="attribute_value"/>
        <parameter key="use_value_type_exception" value="false"/>
        <parameter key="except_value_type" value="time"/>
        <parameter key="block_type" value="attribute_block"/>
        <parameter key="use_block_type_exception" value="false"/>
        <parameter key="except_block_type" value="value_matrix_row_start"/>
        <parameter key="invert_selection" value="false"/>
        <parameter key="include_special_attributes" value="true"/>
        <parameter key="aggregation_function" value="maximum"/>
        <parameter key="concatenation_separator" value="|"/>
        <parameter key="keep_all" value="true"/>
        <parameter key="ignore_missings" value="true"/>
        <parameter key="ignore_missing_attributes" value="false"/>
      </operator>
      <operator activated="true" class="filter_examples" compatibility="9.2.001" expanded="true" height="103" name="Filter Examples" width="90" x="581" y="136">
        <parameter key="parameter_expression" value=""/>
        <parameter key="condition_class" value="custom_filters"/>
        <parameter key="invert_filter" value="false"/>
        <list key="filters_list">
          <parameter key="filters_entry_key" value="Maximum.lt.0\.7"/>
        </list>
        <parameter key="filters_logic_and" value="true"/>
        <parameter key="filters_check_metadata" value="true"/>
      </operator>
      <operator activated="true" class="generate_prediction_ranking" compatibility="9.2.001" expanded="true" height="82" name="Generate Prediction Ranking" width="90" x="581" y="34">
        <parameter key="number_of_ranks" value="2"/>
        <parameter key="remove_old_predictions" value="true"/>
      </operator>
      <operator activated="true" class="rename_by_replacing" compatibility="9.2.001" expanded="true" height="82" name="Rename by Replacing" width="90" x="715" y="34">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="prediction(label)_1"/>
        <parameter key="attributes" value=""/>
        <parameter key="use_except_expression" value="false"/>
        <parameter key="value_type" value="attribute_value"/>
        <parameter key="use_value_type_exception" value="false"/>
        <parameter key="except_value_type" value="time"/>
        <parameter key="block_type" value="attribute_block"/>
        <parameter key="use_block_type_exception" value="false"/>
        <parameter key="except_block_type" value="value_matrix_row_start"/>
        <parameter key="invert_selection" value="false"/>
        <parameter key="include_special_attributes" value="true"/>
        <parameter key="replace_what" value="_1"/>
      </operator>
      <operator activated="true" class="set_role" compatibility="9.2.001" expanded="true" height="82" name="Set Role" width="90" x="849" y="34">
        <parameter key="attribute_name" value="prediction(label)"/>
        <parameter key="target_role" value="prediction"/>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="union" compatibility="9.2.001" expanded="true" height="82" name="Union" width="90" x="849" y="187"/>
      <connect from_op="Retrieve Iris" from_port="output" to_op="Split Data" to_port="example set"/>
      <connect from_op="Split Data" from_port="partition 1" to_op="Naive Bayes" to_port="training set"/>
      <connect from_op="Split Data" from_port="partition 2" to_op="Apply Model" to_port="unlabelled data"/>
      <connect from_op="Naive Bayes" from_port="model" to_op="Apply Model" to_port="model"/>
      <connect from_op="Apply Model" from_port="labelled data" to_op="Generate Aggregation (3)" to_port="example set input"/>
      <connect from_op="Generate Aggregation (3)" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
      <connect from_op="Filter Examples" from_port="example set output" to_op="Generate Prediction Ranking" to_port="example set input"/>
      <connect from_op="Filter Examples" from_port="unmatched example set" to_op="Union" to_port="example set 2"/>
      <connect from_op="Generate Prediction Ranking" from_port="example set output" to_op="Rename by Replacing" to_port="example set input"/>
      <connect from_op="Rename by Replacing" from_port="example set output" to_op="Set Role" to_port="example set input"/>
      <connect from_op="Set Role" from_port="example set output" to_op="Union" to_port="example set 1"/>
      <connect from_op="Union" from_port="union" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

Thanks for all the help and support lately, I know I've been asking a lot of questions and I really appreciate the help.

-Prentice

lionelderkrikor · April 2019

@Prentice

Ah OK, I think I understood.
Can you try this setting of the Generate Aggregation operator ?

Image: https://us.v-cdn.net/6030995/uploads/editor/vf/lb9qw7njtjes.png

Does this solution answer to your need ?

Regards,
Lionel

lionelderkrikor · April 2019

Hi @Prentice

I dit not understand what you want to do.
Can you explain by giving an example ?

Regards,

Lionel

Prentice · April 2019

Hello @lionelderkrikor

I've simplified my example. So right now, with "Generate Aggregation" I've added an attribute "Maximum" which gives the maximum value of the three prediction confidences (Iris-setosa, Iris-versicolor, Iris-virginica).

I've got the "attribute filter type" set to subset, where I have to manually select the attributes of which I want to contain within the maximum, which are the three flower types confidences.

However, in my case, the categories/labels (flower types) are variable, so with a new exampleset I've got a new subset of categories/labels, but I don't want to manually select them each time in the parameter settings.

So what I thought was that if there was a way to create an attribute "Maximum" with only the attributes containing the word confidence (since that's what the categories/labels are expressed as (confidence(Iris-setosa)), this way it's variable when new categories/labels are presented.


<?xml version="1.0" encoding="UTF-8"?><process version="9.2.001"><br>&nbsp; <context><br>&nbsp;&nbsp;&nbsp; <input/><br>&nbsp;&nbsp;&nbsp; <output/><br>&nbsp;&nbsp;&nbsp; <macros/><br>&nbsp; </context><br>&nbsp; <operator activated="true" class="process" compatibility="9.2.001" expanded="true" name="Process"><br>&nbsp;&nbsp;&nbsp; <parameter key="logverbosity" value="init"/><br>&nbsp;&nbsp;&nbsp; <parameter key="random_seed" value="2001"/><br>&nbsp;&nbsp;&nbsp; <parameter key="send_mail" value="never"/><br>&nbsp;&nbsp;&nbsp; <parameter key="notification_email" value=""/><br>&nbsp;&nbsp;&nbsp; <parameter key="process_duration_for_mail" value="30"/><br>&nbsp;&nbsp;&nbsp; <parameter key="encoding" value="UTF-8"/><br>&nbsp;&nbsp;&nbsp; <process expanded="true"><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <operator activated="true" class="retrieve" compatibility="9.2.001" expanded="true" height="68" name="Retrieve Iris" width="90" x="45" y="34"><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="repository_entry" value="//Samples/data/Iris"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; </operator><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <operator activated="true" class="split_data" compatibility="9.2.001" expanded="true" height="103" name="Split Data" width="90" x="179" y="85"><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <enumeration key="partitions"><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="ratio" value="0.66"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="ratio" value="0.34"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; </enumeration><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="sampling_type" value="automatic"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="use_local_random_seed" value="false"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="local_random_seed" value="1992"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; </operator><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <operator activated="true" class="naive_bayes" compatibility="9.2.001" expanded="true" height="82" name="Naive Bayes" width="90" x="313" y="34"><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="laplace_correction" value="true"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; </operator><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <operator activated="true" class="apply_model" compatibility="9.2.001" expanded="true" height="82" name="Apply Model" width="90" x="447" y="85"><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <list key="application_parameters"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="create_view" value="false"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; </operator><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <operator activated="true" class="generate_aggregation" compatibility="9.2.001" expanded="true" height="82" name="Generate Aggregation (3)" width="90" x="581" y="85"><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="attribute_name" value="Maximum"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="attribute_filter_type" value="subset"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="attribute" value=""/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="attributes" value="confidence(Iris-setosa)|confidence(Iris-versicolor)|confidence(Iris-virginica)"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="regular_expression" value="confidence(Iris-setosa)"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="use_except_expression" value="false"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="value_type" value="attribute_value"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="use_value_type_exception" value="false"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="except_value_type" value="time"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="block_type" value="attribute_block"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="use_block_type_exception" value="false"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="except_block_type" value="value_matrix_row_start"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="invert_selection" value="false"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="include_special_attributes" value="true"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="aggregation_function" value="maximum"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="concatenation_separator" value="|"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="keep_all" value="true"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="ignore_missings" value="true"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="ignore_missing_attributes" value="false"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; </operator><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <connect from_op="Retrieve Iris" from_port="output" to_op="Split Data" to_port="example set"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <connect from_op="Split Data" from_port="partition 1" to_op="Naive Bayes" to_port="training set"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <connect from_op="Split Data" from_port="partition 2" to_op="Apply Model" to_port="unlabelled data"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <connect from_op="Naive Bayes" from_port="model" to_op="Apply Model" to_port="model"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <connect from_op="Apply Model" from_port="labelled data" to_op="Generate Aggregation (3)" to_port="example set input"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <connect from_op="Generate Aggregation (3)" from_port="example set output" to_port="result 1"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <portSpacing port="source_input 1" spacing="0"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <portSpacing port="sink_result 1" spacing="0"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <portSpacing port="sink_result 2" spacing="0"/><br>&nbsp;&nbsp;&nbsp; </process><br>&nbsp; </operator><br></process>

I hope this clears things up a bit

lionelderkrikor · April 2019

@Prentice

Did you try the solution I provided in my previous post ?

Regards,

Lionel

Prentice · April 2019

@lionelderkrikor

Yes! That's it, that's exactly what I mean!
Thank you! I'm really bad with regex.

Howdy, Stranger!

Quick Links

Categories

Altair RapidMiner Community

GET HELP. LEARN BEST PRACTICES. NETWORK WITH YOUR PEERS.

How can I aggregate a maximum from all the columns that contain confidence?

Best Answer

Answers