🎉 🎉   RAPIDMINER 9.5 BETA IS OUT!!!   🎉 🎉

GRAB THE HOTTEST NEW BETA OF RAPIDMINER STUDIO, SERVER, AND RADOOP. LET US KNOW WHAT YOU THINK!

CLICK HERE TO DOWNLOAD

🦉 🎤   RapidMiner Wisdom 2020 - CALL FOR SPEAKERS   🦉 🎤

We are inviting all community members to submit proposals to speak at Wisdom 2020 in Boston.


Whether it's a cool RapidMiner trick or a use case implementation, we want to see what you have.
Form link is below and deadline for submissions is November 15. See you in Boston!

CLICK HERE TO GO TO ENTRY FORM

filter Numerical Label by top 75% quartile

msacs09msacs09 Member Posts: 50 Contributor II
edited July 10 in Help
How do i filter a numerical label to only 75% quartile, such that i convert a numerical label to binomial (i.e) either in 75% or not?

Answers

  • lionelderkrikorlionelderkrikor Moderator, RapidMiner Certified Analyst, Member Posts: 784   Unicorn
    edited December 2018
    HI @msacs09,

    It's far fetched, but does this process answer to your need ?

    Edit : You need RapidMiner 9.1 (beta) to execute this process.

    <?xml version="1.0" encoding="UTF-8"?><process version="9.1.000-BETA2">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.1.000-BETA2" expanded="true" name="Process">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="retrieve" compatibility="9.1.000-BETA2" expanded="true" height="68" name="Retrieve Golf" width="90" x="45" y="85">
            <parameter key="repository_entry" value="//Samples/data/Golf"/>
          </operator>
          <operator activated="true" class="select_attributes" compatibility="9.1.000-BETA2" expanded="true" height="82" name="Select Attributes" width="90" x="179" y="85">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="Temperature"/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
          </operator>
          <operator activated="true" class="aggregate" compatibility="9.1.000-BETA2" expanded="true" height="82" name="Aggregate" width="90" x="313" y="85">
            <parameter key="use_default_aggregation" value="false"/>
            <parameter key="attribute_filter_type" value="all"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="default_aggregation_function" value="average"/>
            <list key="aggregation_attributes">
              <parameter key="Temperature" value="percentile (75)"/>
            </list>
            <parameter key="group_by_attributes" value=""/>
            <parameter key="count_all_combinations" value="false"/>
            <parameter key="only_distinct" value="false"/>
            <parameter key="ignore_missings" value="true"/>
          </operator>
          <operator activated="true" class="generate_id" compatibility="9.1.000-BETA2" expanded="true" height="82" name="Generate ID (2)" width="90" x="447" y="136">
            <parameter key="create_nominal_ids" value="false"/>
            <parameter key="offset" value="0"/>
          </operator>
          <operator activated="true" class="generate_id" compatibility="9.1.000-BETA2" expanded="true" height="82" name="Generate ID" width="90" x="447" y="34">
            <parameter key="create_nominal_ids" value="false"/>
            <parameter key="offset" value="0"/>
          </operator>
          <operator activated="true" class="union" compatibility="9.1.000-BETA2" expanded="true" height="82" name="Union" width="90" x="581" y="85"/>
          <operator activated="true" class="replace_missing_values" compatibility="9.1.000-BETA2" expanded="true" height="103" name="Replace Missing Values" width="90" x="719" y="85">
            <parameter key="return_preprocessing_model" value="false"/>
            <parameter key="create_view" value="false"/>
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="percentile (75)(Temperature)"/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="default" value="average"/>
            <list key="columns">
              <parameter key="percentile (75)(Temperature)" value="average"/>
            </list>
          </operator>
          <operator activated="true" class="filter_example_range" compatibility="9.1.000-BETA2" expanded="true" height="82" name="Filter Example Range" width="90" x="853" y="85">
            <parameter key="first_example" value="1"/>
            <parameter key="last_example" value="1"/>
            <parameter key="invert_filter" value="true"/>
          </operator>
          <operator activated="true" class="generate_attributes" compatibility="9.1.000-BETA2" expanded="true" height="82" name="Generate Attributes" width="90" x="987" y="85">
            <list key="function_descriptions">
              <parameter key="temperature_binominal" value="if(Temperature&gt;=[percentile (75)(Temperature)],1,0)"/>
            </list>
            <parameter key="keep_all" value="true"/>
          </operator>
          <connect from_op="Retrieve Golf" from_port="output" to_op="Select Attributes" to_port="example set input"/>
          <connect from_op="Select Attributes" from_port="example set output" to_op="Aggregate" to_port="example set input"/>
          <connect from_op="Aggregate" from_port="example set output" to_op="Generate ID" to_port="example set input"/>
          <connect from_op="Aggregate" from_port="original" to_op="Generate ID (2)" to_port="example set input"/>
          <connect from_op="Generate ID (2)" from_port="example set output" to_op="Union" to_port="example set 2"/>
          <connect from_op="Generate ID" from_port="example set output" to_op="Union" to_port="example set 1"/>
          <connect from_op="Union" from_port="union" to_op="Replace Missing Values" to_port="example set input"/>
          <connect from_op="Replace Missing Values" from_port="example set output" to_op="Filter Example Range" to_port="example set input"/>
          <connect from_op="Filter Example Range" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
          <connect from_op="Generate Attributes" from_port="example set output" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>


    Regards,

    Lionel

  • Telcontar120Telcontar120 Moderator, RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 1,262   Unicorn
    There is an operator called Winsorize which is in the free Finance and Economics extension which basically does this.  Actually it lets you cap values at the 75th percentile (or whatever percentile you select), and then you could use the standard Numerical to Binominal operator after that to convert that to a 0/1 flag based on the value.

    Brian T.
    Lindon Ventures 
    Data Science Consulting from Certified RapidMiner Experts
    mschmitz
  • msacs09msacs09 Member Posts: 50 Contributor II
    Thank you all
Sign In or Register to comment.