RAPIDMINER 9.7 BETA ANNOUNCEMENT

The beta program for the RapidMiner 9.7 release is now available. Lots of amazing new improvements including true version control!

CLICK HERE TO DOWNLOAD

Specific number of samples per category

online360online360 Member Posts: 34 Contributor I
edited November 2018 in Help
Dear everyone!

I'm currently trying to get a deeper knowledge about RM.

I just wanted to get 10 samples per category from a data set, which is a list of products. (There can be multiple products in one category)

What I did is the following:
retrieve the data
trim the data (as there are spaces before and after some words sometimes)
Set role "label" to the category attribute
Apply "Sample" Operator with absolute set to 10

Does someone know why the result doesn't show any examples?

P.S.: ID is set to be the product-id

Thanks!
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="7.0.001">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="7.0.001" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="7.0.001" expanded="true" height="68" name="Retrieve t123_product_import_23032016" width="90" x="45" y="85">
        <parameter key="repository_entry" value="//tech123_win/t123_product_import_23032016"/>
      </operator>
      <operator activated="true" class="trim" compatibility="7.0.001" expanded="true" height="82" name="Trim" width="90" x="112" y="187">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="etim"/>
        <parameter key="include_special_attributes" value="true"/>
      </operator>
      <operator activated="true" class="set_role" compatibility="7.0.001" expanded="true" height="82" name="Set Role" width="90" x="246" y="85">
        <parameter key="attribute_name" value="etim"/>
        <parameter key="target_role" value="label"/>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="sample" compatibility="7.0.001" expanded="true" height="82" name="Sample" width="90" x="447" y="136">
        <parameter key="balance_data" value="true"/>
        <list key="sample_size_per_class">
          <parameter key="EC000993" value="5"/>
          <parameter key="EC000058" value="5"/>
        </list>
        <list key="sample_ratio_per_class"/>
        <list key="sample_probability_per_class"/>
      </operator>
      <connect from_op="Retrieve t123_product_import_23032016" from_port="output" to_op="Trim" to_port="example set input"/>
      <connect from_op="Trim" from_port="example set output" to_op="Set Role" to_port="example set input"/>
      <connect from_op="Set Role" from_port="example set output" to_op="Sample" to_port="example set input"/>
      <connect from_op="Sample" from_port="example set output" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>
Tagged:

Answers

  • JEdwardJEdward RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 570   Unicorn
    Are you definitely able to find the products you are searching for with Filter Examples?  Maybe you need to trim them a little more.
    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="7.0.001">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="7.0.001" expanded="true" name="Process">
        <process expanded="true">
          <operator activated="true" class="retrieve" compatibility="7.0.001" expanded="true" height="68" name="Retrieve t123_product_import_23032016" width="90" x="45" y="85">
            <parameter key="repository_entry" value="//tech123_win/t123_product_import_23032016"/>
          </operator>
          <operator activated="true" class="trim" compatibility="7.0.001" expanded="true" height="82" name="Trim" width="90" x="112" y="187">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="etim"/>
            <parameter key="include_special_attributes" value="true"/>
          </operator>
          <operator activated="true" class="set_role" compatibility="7.0.001" expanded="true" height="82" name="Set Role" width="90" x="246" y="85">
            <parameter key="attribute_name" value="etim"/>
            <parameter key="target_role" value="label"/>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="false" class="sample" compatibility="7.0.001" expanded="true" height="82" name="Sample" width="90" x="447" y="136">
            <parameter key="balance_data" value="true"/>
            <list key="sample_size_per_class">
              <parameter key="EC000993" value="5"/>
              <parameter key="EC000058" value="5"/>
            </list>
            <list key="sample_ratio_per_class"/>
            <list key="sample_probability_per_class"/>
          </operator>
          <operator activated="true" class="filter_examples" compatibility="7.0.001" expanded="true" height="103" name="Filter Examples" width="90" x="447" y="238">
            <list key="filters_list">
              <parameter key="filters_entry_key" value="etim.equals.EC000993"/>
              <parameter key="filters_entry_key" value="etim.equals.EC000058"/>
            </list>
            <parameter key="filters_logic_and" value="false"/>
          </operator>
          <connect from_op="Retrieve t123_product_import_23032016" from_port="output" to_op="Trim" to_port="example set input"/>
          <connect from_op="Trim" from_port="example set output" to_op="Set Role" to_port="example set input"/>
          <connect from_op="Set Role" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
          <connect from_op="Filter Examples" from_port="example set output" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>
  • online360online360 Member Posts: 34 Contributor I
    Thanks for you input!

    Unfortunately, this only filters out the selected categories; not only 10 of each.

    What I'd like to do is to get 10 samples of each category (each begins with "EC"), so I tried using regex.
    Unfortunately, I don't get any results then. (maybe my regex is wrong?)

    I also added a few filters to only get those examples that contain a description and an image.
    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="7.0.001">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="7.0.001" expanded="true" name="Process">
        <process expanded="true">
          <operator activated="true" class="retrieve" compatibility="7.0.001" expanded="true" height="68" name="Retrieve t123_product_import_23032016" width="90" x="45" y="85">
            <parameter key="repository_entry" value="//Cloud Repository/tech123_win/t123_product_import_23032016"/>
          </operator>
          <operator activated="true" class="trim" compatibility="7.0.001" expanded="true" height="82" name="Trim" width="90" x="112" y="187">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="etim"/>
            <parameter key="include_special_attributes" value="true"/>
          </operator>
          <operator activated="true" class="set_role" compatibility="7.0.001" expanded="true" height="82" name="Set Role" width="90" x="246" y="85">
            <parameter key="attribute_name" value="etim"/>
            <parameter key="target_role" value="label"/>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="filter_examples" compatibility="7.0.001" expanded="true" height="103" name="Filter Examples" width="90" x="447" y="238">
            <list key="filters_list">
              <parameter key="filters_entry_key" value="etim.is_not_missing."/>
            </list>
            <parameter key="filters_logic_and" value="false"/>
          </operator>
          <operator activated="true" class="filter_examples" compatibility="7.0.001" expanded="true" height="103" name="Filter Examples (2)" width="90" x="581" y="238">
            <list key="filters_list">
              <parameter key="filters_entry_key" value="image.contains.\."/>
            </list>
          </operator>
          <operator activated="true" class="filter_examples" compatibility="7.0.001" expanded="true" height="103" name="Filter Examples (3)" width="90" x="715" y="238">
            <list key="filters_list">
              <parameter key="filters_entry_key" value="description.is_not_missing."/>
            </list>
          </operator>
          <operator activated="true" class="sample" compatibility="7.0.001" expanded="true" height="82" name="Sample" width="90" x="916" y="238">
            <parameter key="balance_data" value="true"/>
            <list key="sample_size_per_class">
              <parameter key="EC.*" value="10"/>
            </list>
            <list key="sample_ratio_per_class"/>
            <list key="sample_probability_per_class"/>
          </operator>
          <connect from_op="Retrieve t123_product_import_23032016" from_port="output" to_op="Trim" to_port="example set input"/>
          <connect from_op="Trim" from_port="example set output" to_op="Set Role" to_port="example set input"/>
          <connect from_op="Set Role" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
          <connect from_op="Filter Examples" from_port="example set output" to_op="Filter Examples (2)" to_port="example set input"/>
          <connect from_op="Filter Examples (2)" from_port="example set output" to_op="Filter Examples (3)" to_port="example set input"/>
          <connect from_op="Filter Examples (3)" from_port="example set output" to_op="Sample" to_port="example set input"/>
          <connect from_op="Sample" from_port="example set output" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>
Sign In or Register to comment.