🎉 🎉 RAPIDMINER 9.10 IS OUT!!! 🎉🎉

Download the latest version helping analytics teams accelerate time-to-value for streaming and IIOT use cases.

CLICK HERE TO DOWNLOAD

How can I aggregate a maximum from all the columns that contain confidence?

PrenticePrentice Member Posts: 66  Maven
edited June 2019 in Help
Hi,

Ok, so I've asked this question twice before together with another question (which was probably not the best idea) and never got an answer for it. 

So I've got a model that does a second prediction if the confidence is lower than 0.7. For this I aggregated the confidences and made a new attribute with the maximum of these. Which is where the second prediction is based on. 
The problem is that I've now manually selected the confidence attributes, but in a case with text categorisation these confidence attributes are words which are variable. So my question is how can I aggregate a maximum from all the columns that contain confidence(*word*), which in the case of this example would be the three flowers. I've tried to do it with regular expressions, but it just doesn't work for me. I've also tried generate attributes or the regular aggregate but I just don't know how.

<?xml version="1.0" encoding="UTF-8"?><process version="9.2.001">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="9.2.001" expanded="true" name="Process">
    <parameter key="logverbosity" value="init"/>
    <parameter key="random_seed" value="2001"/>
    <parameter key="send_mail" value="never"/>
    <parameter key="notification_email" value=""/>
    <parameter key="process_duration_for_mail" value="30"/>
    <parameter key="encoding" value="UTF-8"/>
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="9.2.001" expanded="true" height="68" name="Retrieve Iris" width="90" x="45" y="34">
        <parameter key="repository_entry" value="//Samples/data/Iris"/>
      </operator>
      <operator activated="true" class="split_data" compatibility="9.2.001" expanded="true" height="103" name="Split Data" width="90" x="179" y="85">
        <enumeration key="partitions">
          <parameter key="ratio" value="0.66"/>
          <parameter key="ratio" value="0.34"/>
        </enumeration>
        <parameter key="sampling_type" value="automatic"/>
        <parameter key="use_local_random_seed" value="false"/>
        <parameter key="local_random_seed" value="1992"/>
      </operator>
      <operator activated="true" class="naive_bayes" compatibility="9.2.001" expanded="true" height="82" name="Naive Bayes" width="90" x="313" y="34">
        <parameter key="laplace_correction" value="true"/>
      </operator>
      <operator activated="true" class="apply_model" compatibility="9.2.001" expanded="true" height="82" name="Apply Model" width="90" x="313" y="136">
        <list key="application_parameters"/>
        <parameter key="create_view" value="false"/>
      </operator>
      <operator activated="true" class="generate_aggregation" compatibility="9.2.001" expanded="true" height="82" name="Generate Aggregation (3)" width="90" x="447" y="238">
        <parameter key="attribute_name" value="Maximum"/>
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attribute" value=""/>
        <parameter key="attributes" value="confidence(Iris-setosa)|confidence(Iris-versicolor)|confidence(Iris-virginica)"/>
        <parameter key="regular_expression" value="confidence(Iris-setosa)"/>
        <parameter key="use_except_expression" value="false"/>
        <parameter key="value_type" value="attribute_value"/>
        <parameter key="use_value_type_exception" value="false"/>
        <parameter key="except_value_type" value="time"/>
        <parameter key="block_type" value="attribute_block"/>
        <parameter key="use_block_type_exception" value="false"/>
        <parameter key="except_block_type" value="value_matrix_row_start"/>
        <parameter key="invert_selection" value="false"/>
        <parameter key="include_special_attributes" value="true"/>
        <parameter key="aggregation_function" value="maximum"/>
        <parameter key="concatenation_separator" value="|"/>
        <parameter key="keep_all" value="true"/>
        <parameter key="ignore_missings" value="true"/>
        <parameter key="ignore_missing_attributes" value="false"/>
      </operator>
      <operator activated="true" class="filter_examples" compatibility="9.2.001" expanded="true" height="103" name="Filter Examples" width="90" x="581" y="136">
        <parameter key="parameter_expression" value=""/>
        <parameter key="condition_class" value="custom_filters"/>
        <parameter key="invert_filter" value="false"/>
        <list key="filters_list">
          <parameter key="filters_entry_key" value="Maximum.lt.0\.7"/>
        </list>
        <parameter key="filters_logic_and" value="true"/>
        <parameter key="filters_check_metadata" value="true"/>
      </operator>
      <operator activated="true" class="generate_prediction_ranking" compatibility="9.2.001" expanded="true" height="82" name="Generate Prediction Ranking" width="90" x="581" y="34">
        <parameter key="number_of_ranks" value="2"/>
        <parameter key="remove_old_predictions" value="true"/>
      </operator>
      <operator activated="true" class="rename_by_replacing" compatibility="9.2.001" expanded="true" height="82" name="Rename by Replacing" width="90" x="715" y="34">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="prediction(label)_1"/>
        <parameter key="attributes" value=""/>
        <parameter key="use_except_expression" value="false"/>
        <parameter key="value_type" value="attribute_value"/>
        <parameter key="use_value_type_exception" value="false"/>
        <parameter key="except_value_type" value="time"/>
        <parameter key="block_type" value="attribute_block"/>
        <parameter key="use_block_type_exception" value="false"/>
        <parameter key="except_block_type" value="value_matrix_row_start"/>
        <parameter key="invert_selection" value="false"/>
        <parameter key="include_special_attributes" value="true"/>
        <parameter key="replace_what" value="_1"/>
      </operator>
      <operator activated="true" class="set_role" compatibility="9.2.001" expanded="true" height="82" name="Set Role" width="90" x="849" y="34">
        <parameter key="attribute_name" value="prediction(label)"/>
        <parameter key="target_role" value="prediction"/>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="union" compatibility="9.2.001" expanded="true" height="82" name="Union" width="90" x="849" y="187"/>
      <connect from_op="Retrieve Iris" from_port="output" to_op="Split Data" to_port="example set"/>
      <connect from_op="Split Data" from_port="partition 1" to_op="Naive Bayes" to_port="training set"/>
      <connect from_op="Split Data" from_port="partition 2" to_op="Apply Model" to_port="unlabelled data"/>
      <connect from_op="Naive Bayes" from_port="model" to_op="Apply Model" to_port="model"/>
      <connect from_op="Apply Model" from_port="labelled data" to_op="Generate Aggregation (3)" to_port="example set input"/>
      <connect from_op="Generate Aggregation (3)" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
      <connect from_op="Filter Examples" from_port="example set output" to_op="Generate Prediction Ranking" to_port="example set input"/>
      <connect from_op="Filter Examples" from_port="unmatched example set" to_op="Union" to_port="example set 2"/>
      <connect from_op="Generate Prediction Ranking" from_port="example set output" to_op="Rename by Replacing" to_port="example set input"/>
      <connect from_op="Rename by Replacing" from_port="example set output" to_op="Set Role" to_port="example set input"/>
      <connect from_op="Set Role" from_port="example set output" to_op="Union" to_port="example set 1"/>
      <connect from_op="Union" from_port="union" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>


Thanks for all the help and support lately, I know I've been asking a lot of questions and I really appreciate the help.

-Prentice

Best Answer

  • lionelderkrikorlionelderkrikor Moderator, RapidMiner Certified Analyst, Member Posts: 1,185   Unicorn
    Solution Accepted
    @Prentice

    Ah OK, I think I understood.
    Can you try this setting of the Generate Aggregation operator ?


    Does this solution answer to your need ?

    Regards,
    Lionel
    Prenticesgenzer

Answers

  • lionelderkrikorlionelderkrikor Moderator, RapidMiner Certified Analyst, Member Posts: 1,185   Unicorn
    Hi @Prentice

    I dit not understand what you want to do.
    Can you explain by giving an example ?

    Regards,

    Lionel
    Prentice
  • PrenticePrentice Member Posts: 66  Maven
    Hello @lionelderkrikor


    I've simplified my example. So right now, with "Generate Aggregation" I've added an attribute "Maximum" which gives the maximum value of the three prediction confidences (Iris-setosa, Iris-versicolor, Iris-virginica). 

    I've got the "attribute filter type" set to subset, where I have to manually select the attributes of which I want to contain within the maximum, which are the three flower types confidences. 

    However, in my case, the categories/labels (flower types) are variable, so with a new exampleset I've got a new subset of categories/labels, but I don't want to manually select them each time in the parameter settings. 

    So what I thought was that if there was a way to create an attribute "Maximum" with only the attributes containing the word confidence (since that's what the categories/labels are expressed as (confidence(Iris-setosa)), this way it's variable when new categories/labels are presented.


    <?xml version="1.0" encoding="UTF-8"?><process version="9.2.001"><br>&nbsp; <context><br>&nbsp;&nbsp;&nbsp; <input/><br>&nbsp;&nbsp;&nbsp; <output/><br>&nbsp;&nbsp;&nbsp; <macros/><br>&nbsp; </context><br>&nbsp; <operator activated="true" class="process" compatibility="9.2.001" expanded="true" name="Process"><br>&nbsp;&nbsp;&nbsp; <parameter key="logverbosity" value="init"/><br>&nbsp;&nbsp;&nbsp; <parameter key="random_seed" value="2001"/><br>&nbsp;&nbsp;&nbsp; <parameter key="send_mail" value="never"/><br>&nbsp;&nbsp;&nbsp; <parameter key="notification_email" value=""/><br>&nbsp;&nbsp;&nbsp; <parameter key="process_duration_for_mail" value="30"/><br>&nbsp;&nbsp;&nbsp; <parameter key="encoding" value="UTF-8"/><br>&nbsp;&nbsp;&nbsp; <process expanded="true"><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <operator activated="true" class="retrieve" compatibility="9.2.001" expanded="true" height="68" name="Retrieve Iris" width="90" x="45" y="34"><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="repository_entry" value="//Samples/data/Iris"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; </operator><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <operator activated="true" class="split_data" compatibility="9.2.001" expanded="true" height="103" name="Split Data" width="90" x="179" y="85"><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <enumeration key="partitions"><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="ratio" value="0.66"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="ratio" value="0.34"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; </enumeration><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="sampling_type" value="automatic"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="use_local_random_seed" value="false"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="local_random_seed" value="1992"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; </operator><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <operator activated="true" class="naive_bayes" compatibility="9.2.001" expanded="true" height="82" name="Naive Bayes" width="90" x="313" y="34"><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="laplace_correction" value="true"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; </operator><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <operator activated="true" class="apply_model" compatibility="9.2.001" expanded="true" height="82" name="Apply Model" width="90" x="447" y="85"><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <list key="application_parameters"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="create_view" value="false"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; </operator><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <operator activated="true" class="generate_aggregation" compatibility="9.2.001" expanded="true" height="82" name="Generate Aggregation (3)" width="90" x="581" y="85"><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="attribute_name" value="Maximum"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="attribute_filter_type" value="subset"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="attribute" value=""/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="attributes" value="confidence(Iris-setosa)|confidence(Iris-versicolor)|confidence(Iris-virginica)"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="regular_expression" value="confidence(Iris-setosa)"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="use_except_expression" value="false"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="value_type" value="attribute_value"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="use_value_type_exception" value="false"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="except_value_type" value="time"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="block_type" value="attribute_block"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="use_block_type_exception" value="false"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="except_block_type" value="value_matrix_row_start"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="invert_selection" value="false"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="include_special_attributes" value="true"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="aggregation_function" value="maximum"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="concatenation_separator" value="|"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="keep_all" value="true"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="ignore_missings" value="true"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <parameter key="ignore_missing_attributes" value="false"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; </operator><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <connect from_op="Retrieve Iris" from_port="output" to_op="Split Data" to_port="example set"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <connect from_op="Split Data" from_port="partition 1" to_op="Naive Bayes" to_port="training set"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <connect from_op="Split Data" from_port="partition 2" to_op="Apply Model" to_port="unlabelled data"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <connect from_op="Naive Bayes" from_port="model" to_op="Apply Model" to_port="model"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <connect from_op="Apply Model" from_port="labelled data" to_op="Generate Aggregation (3)" to_port="example set input"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <connect from_op="Generate Aggregation (3)" from_port="example set output" to_port="result 1"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <portSpacing port="source_input 1" spacing="0"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <portSpacing port="sink_result 1" spacing="0"/><br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <portSpacing port="sink_result 2" spacing="0"/><br>&nbsp;&nbsp;&nbsp; </process><br>&nbsp; </operator><br></process>


    I hope this clears things up a bit

  • lionelderkrikorlionelderkrikor Moderator, RapidMiner Certified Analyst, Member Posts: 1,185   Unicorn
    @Prentice

    Did you try the solution I provided in my previous post ?

    Regards,

    Lionel
    Prentice
  • PrenticePrentice Member Posts: 66  Maven
    @lionelderkrikor

    Yes! That's it, that's exactly what I mean!
    Thank you! I'm really bad with regex.
Sign In or Register to comment.