Due to recent updates, all users are required to create an Altair One account to login to the RapidMiner community. Click the Register button to create your account using the same email that you have previously used to login to the RapidMiner community. This will ensure that any previously created content will be synced to your Altair One account. Once you login, you will be asked to provide a username that identifies you to other Community users. Email us at Community with questions.

How to increase the number our exampleset?

mansour_ebrahimmansour_ebrahim Member Posts: 22 Contributor II
edited August 2019 in Help
Hi all
Is there any operator in RapidMiner to increase the number of example in the dataset? I mean an operator which generate more samples from all groups and increase the total numbers of example in my dataset. I am running a DL model on my dataset but the number of samples is not enough and cannot get more samples and have to generate and produce more samples from all groups. 
Also, I do not want to balance the number of samples in classes; just increasing the size of dataset let's say threefold.
Regards.
Mansour

Answers

  • lionelderkrikorlionelderkrikor RapidMiner Certified Analyst, Member Posts: 1,195 Unicorn
    Hi @mansour_ebrahim,

    You can play with the SMOTE Upsampling operator to increase the number of examples in your dataset.
    Here an example of process with such operator : 

    <?xml version="1.0" encoding="UTF-8"?><process version="9.4.000-BETA">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.4.000-BETA" expanded="true" name="Process">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="utility:create_exampleset" compatibility="9.4.000-BETA" expanded="true" height="68" name="Create ExampleSet" width="90" x="179" y="85">
            <parameter key="generator_type" value="attribute functions"/>
            <parameter key="number_of_examples" value="100"/>
            <parameter key="use_stepsize" value="false"/>
            <list key="function_descriptions">
              <parameter key="test" value="10*rand()"/>
              <parameter key="label" value="if(test&lt;5,0,1)"/>
            </list>
            <parameter key="add_id_attribute" value="false"/>
            <list key="numeric_series_configuration"/>
            <list key="date_series_configuration"/>
            <list key="date_series_configuration (interval)"/>
            <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
            <parameter key="time_zone" value="SYSTEM"/>
            <parameter key="column_separator" value=","/>
            <parameter key="parse_all_as_nominal" value="false"/>
            <parameter key="decimal_point_character" value="."/>
            <parameter key="trim_attribute_names" value="true"/>
          </operator>
          <operator activated="true" class="set_role" compatibility="9.4.000-BETA" expanded="true" height="82" name="Set Role" width="90" x="313" y="85">
            <parameter key="attribute_name" value="label"/>
            <parameter key="target_role" value="label"/>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" breakpoints="after" class="numerical_to_binominal" compatibility="9.4.000-BETA" expanded="true" height="82" name="Numerical to Binominal" width="90" x="447" y="85">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="label"/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="numeric"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="real"/>
            <parameter key="block_type" value="value_series"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_series_end"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="true"/>
            <parameter key="min" value="0.0"/>
            <parameter key="max" value="0.0"/>
          </operator>
          <operator activated="true" class="operator_toolbox:smote" compatibility="2.1.000" expanded="true" height="82" name="SMOTE Upsampling" width="90" x="581" y="85">
            <parameter key="number_of_neighbours" value="5"/>
            <parameter key="normalize" value="true"/>
            <parameter key="equalize_classes" value="false"/>
            <parameter key="upsampling_size" value="1000"/>
            <parameter key="auto_detect_minority_class" value="true"/>
            <parameter key="round_integers" value="true"/>
            <parameter key="nominal_change_rate" value="0.5"/>
            <parameter key="use_local_random_seed" value="false"/>
            <parameter key="local_random_seed" value="1992"/>
          </operator>
          <connect from_op="Create ExampleSet" from_port="output" to_op="Set Role" to_port="example set input"/>
          <connect from_op="Set Role" from_port="example set output" to_op="Numerical to Binominal" to_port="example set input"/>
          <connect from_op="Numerical to Binominal" from_port="example set output" to_op="SMOTE Upsampling" to_port="exa"/>
          <connect from_op="SMOTE Upsampling" from_port="ups" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>
    
    Hope this helps,

    Regards,

    Lionel



Sign In or Register to comment.