Options

if else condition implementation in Rapid miner

Himanshu_PantHimanshu_Pant Member Posts: 46 Contributor I
I have a data frame which consists 10000+ rows. It has one column 'abc' and another coulmn 'date'. 'abc' column consists many integer values. I want 400 continues rows where the value in 'abc' is greater than 2.

example: I'll start with first row, I'll check value in column 'abc'. If its greater than 2, will consider the row and move to next row. Lets say in 100th row the value in 'abc' column is less than 2, the counter will start again to look for 400 continues rows where the value in 'abc' is greater than 2.

Best Answer

  • Options
    MartinLiebigMartinLiebig Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor Posts: 3,507 RM Data Scientist
    Solution Accepted
    Hi,
    attached is an example. That should do it.

    Best,
    Martin


    <?xml version="1.0" encoding="UTF-8"?><process version="9.9.002">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.9.002" expanded="true" name="Process">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="utility:create_exampleset" compatibility="9.9.002" expanded="true" height="68" name="Create ExampleSet" width="90" x="45" y="187">
            <parameter key="generator_type" value="attribute functions"/>
            <parameter key="number_of_examples" value="100"/>
            <parameter key="use_stepsize" value="false"/>
            <list key="function_descriptions">
              <parameter key="value" value="1.9+rand()"/>
            </list>
            <parameter key="add_id_attribute" value="true"/>
            <list key="numeric_series_configuration"/>
            <list key="date_series_configuration"/>
            <list key="date_series_configuration (interval)"/>
            <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
            <parameter key="time_zone" value="SYSTEM"/>
            <parameter key="column_separator" value=","/>
            <parameter key="parse_all_as_nominal" value="false"/>
            <parameter key="decimal_point_character" value="."/>
            <parameter key="trim_attribute_names" value="true"/>
          </operator>
          <operator activated="true" class="generate_attributes" compatibility="9.9.002" expanded="true" height="82" name="Generate Attributes" width="90" x="246" y="187">
            <list key="function_descriptions">
              <parameter key="flag" value="if(value &gt; 2, 1,0)"/>
            </list>
            <parameter key="keep_all" value="true"/>
            <description align="center" color="transparent" colored="false" width="126">Generate Flag attribute</description>
          </operator>
          <operator activated="true" class="operator_toolbox:generate_session_id" compatibility="2.12.000-SNAPSHOT" expanded="true" height="82" name="Generate Session ID" width="90" x="380" y="187">
            <parameter key="date_attribute" value="flag"/>
            <parameter key="gap_threshold" value="0.5"/>
            <parameter key="gap_unit" value="none"/>
            <parameter key="use_absolutes" value="true"/>
            <description align="center" color="transparent" colored="false" width="126">Generate an attribute which is counting up, everytime the flag changes</description>
          </operator>
          <operator activated="true" class="aggregate" compatibility="9.9.002" expanded="true" height="82" name="Aggregate" width="90" x="514" y="187">
            <parameter key="use_default_aggregation" value="false"/>
            <parameter key="attribute_filter_type" value="all"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="default_aggregation_function" value="average"/>
            <list key="aggregation_attributes">
              <parameter key="id" value="count"/>
            </list>
            <parameter key="group_by_attributes" value="Session id"/>
            <parameter key="count_all_combinations" value="false"/>
            <parameter key="only_distinct" value="false"/>
            <parameter key="ignore_missings" value="true"/>
          </operator>
          <operator activated="true" class="filter_examples" compatibility="9.9.002" expanded="true" height="103" name="Filter Examples" width="90" x="648" y="34">
            <parameter key="parameter_expression" value=""/>
            <parameter key="condition_class" value="custom_filters"/>
            <parameter key="invert_filter" value="false"/>
            <list key="filters_list">
              <parameter key="filters_entry_key" value="count(id).ge.5"/>
            </list>
            <parameter key="filters_logic_and" value="true"/>
            <parameter key="filters_check_metadata" value="true"/>
          </operator>
          <operator activated="true" class="concurrency:join" compatibility="9.9.002" expanded="true" height="82" name="Join" width="90" x="849" y="187">
            <parameter key="remove_double_attributes" value="true"/>
            <parameter key="join_type" value="inner"/>
            <parameter key="use_id_attribute_as_key" value="false"/>
            <list key="key_attributes">
              <parameter key="Session id" value="Session id"/>
            </list>
            <parameter key="keep_both_join_attributes" value="false"/>
          </operator>
          <operator activated="false" class="operator_toolbox:group_into_collection" compatibility="2.12.000-SNAPSHOT" expanded="true" height="82" name="Group Into Collection" width="90" x="648" y="442">
            <parameter key="group_by_attribute" value="id"/>
            <parameter key="group_by_attribute (numerical)" value="Session id"/>
            <parameter key="sorting_order" value="numerical"/>
          </operator>
          <operator activated="false" class="loop_collection" compatibility="9.9.002" expanded="true" height="82" name="Loop Collection" width="90" x="782" y="442">
            <parameter key="set_iteration_macro" value="false"/>
            <parameter key="macro_name" value="iteration"/>
            <parameter key="macro_start_value" value="1"/>
            <parameter key="unfold" value="false"/>
            <process expanded="true">
              <operator activated="true" class="branch" compatibility="9.9.002" expanded="true" height="82" name="Branch" width="90" x="581" y="34">
                <parameter key="condition_type" value="min_examples"/>
                <parameter key="condition_value" value="5"/>
                <parameter key="expression" value=""/>
                <parameter key="io_object" value="ANOVAMatrix"/>
                <parameter key="return_inner_output" value="true"/>
                <process expanded="true">
                  <connect from_port="condition" to_port="input 1"/>
                  <portSpacing port="source_condition" spacing="0"/>
                  <portSpacing port="source_input 1" spacing="0"/>
                  <portSpacing port="sink_input 1" spacing="0"/>
                  <portSpacing port="sink_input 2" spacing="0"/>
                </process>
                <process expanded="true">
                  <portSpacing port="source_condition" spacing="0"/>
                  <portSpacing port="source_input 1" spacing="0"/>
                  <portSpacing port="sink_input 1" spacing="0"/>
                  <portSpacing port="sink_input 2" spacing="0"/>
                </process>
              </operator>
              <connect from_port="single" to_op="Branch" to_port="condition"/>
              <connect from_op="Branch" from_port="input 1" to_port="output 1"/>
              <portSpacing port="source_single" spacing="0"/>
              <portSpacing port="sink_output 1" spacing="0"/>
              <portSpacing port="sink_output 2" spacing="0"/>
            </process>
          </operator>
          <connect from_op="Create ExampleSet" from_port="output" to_op="Generate Attributes" to_port="example set input"/>
          <connect from_op="Generate Attributes" from_port="example set output" to_op="Generate Session ID" to_port="exa"/>
          <connect from_op="Generate Session ID" from_port="exa" to_op="Aggregate" to_port="example set input"/>
          <connect from_op="Aggregate" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
          <connect from_op="Aggregate" from_port="original" to_op="Join" to_port="right"/>
          <connect from_op="Filter Examples" from_port="example set output" to_op="Join" to_port="left"/>
          <connect from_op="Join" from_port="join" to_port="result 1"/>
          <connect from_op="Group Into Collection" from_port="col" to_op="Loop Collection" to_port="collection"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
          <description align="center" color="yellow" colored="false" height="199" resized="true" width="277" x="604" y="362">Alterantive implementation of keeping only subdatasets of length 5</description>
          <description align="center" color="yellow" colored="false" height="308" resized="true" width="509" x="497" y="10">Keep only sessions of length 5 or longer</description>
        </process>
      </operator>
    </process>




    - Sr. Director Data Solutions, Altair RapidMiner -
    Dortmund, Germany

Answers

  • Options
    MartinLiebigMartinLiebig Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor Posts: 3,507 RM Data Scientist
    i do not understand what you want? Is it something like this?


    def selectConsecutiveValues():
    selected_values = []
    counter = 0

    for value in column:
    if value > 2:
    selected_values.append(value)
    counter = counter + 1
    else:
    selected_values.clear()

    if len(selected_values == 400):
    return selected_values



    - Sr. Director Data Solutions, Altair RapidMiner -
    Dortmund, Germany
  • Options
    Himanshu_PantHimanshu_Pant Member Posts: 46 Contributor I
    correct. Eventually we'll be getting continuous 400 points when value is greater than 2.
Sign In or Register to comment.