image

🎉 🎉 RAPIDMINER 9.10 IS OUT!!! 🎉🎉

Download the latest version helping analytics teams accelerate time-to-value for streaming and IIOT use cases.

CLICK HERE TO DOWNLOAD

"W-M5P, errors in results"

ArmenArmen Member Posts: 7 Contributor II
edited May 2019 in Help
I am going on evaluating RapidMiner v5.0... ;)

Today I was trying to work on a weka node: M5P.
I am trying to create a model tree for each loop on the values of "DistrictName".

The resulting trees are interesting, but the linear models have some problems.
All of them are in this format:

LM num: 1
Price_2009 =
+ (value1)


with valuei different per each LM and DistrictName.

Do you know why it does not work?

Thank you, Armen


<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" expanded="true" name="Root">
    <process expanded="true" height="539" width="1016">
      <operator activated="true" class="retrieve" expanded="true" height="60" name="Retrieve" width="90" x="45" y="120">
        <parameter key="repository_entry" value="//NewLocalRepository/data2"/>
      </operator>
      <operator activated="true" class="select_attributes" expanded="true" height="76" name="Select Attributes (2)" width="90" x="179" y="120">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="ID|Price_2009|DistrictName|Flat|House|House_size_tot|House_size|House_ground|Basement_size|Year|WE|SN|dataset"/>
      </operator>
      <operator activated="true" class="numerical_to_binominal" expanded="true" height="76" name="Numerical to Binominal" width="90" x="313" y="120">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="House|Flat"/>
      </operator>
      <operator activated="false" class="set_role" expanded="true" height="76" name="Set Role" width="90" x="380" y="300">
        <parameter key="name" value="Price_2009"/>
      </operator>
      <operator activated="false" class="discretize_by_bins" expanded="true" height="94" name="Discretize" width="90" x="514" y="300">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="Price_2009"/>
        <parameter key="attributes" value="Price_2009"/>
        <parameter key="number_of_bins" value="10"/>
      </operator>
      <operator activated="false" class="set_role" expanded="true" height="76" name="Set Role (2)" width="90" x="648" y="300">
        <parameter key="name" value="Price_2009"/>
        <parameter key="target_role" value="label"/>
      </operator>
      <operator activated="true" class="loop_values" expanded="true" height="112" name="ValueIterator" width="90" x="715" y="120">
        <parameter key="attribute" value="DistrictName"/>
        <process expanded="true" height="484" width="1016">
          <operator activated="true" class="filter_examples" expanded="true" height="76" name="ExampleFilter" width="90" x="45" y="120">
            <parameter key="condition_class" value="attribute_value_filter"/>
            <parameter key="parameter_string" value="DistrictName = %{loop_value}"/>
          </operator>
          <operator activated="true" class="filter_examples" expanded="true" height="76" name="Filter Examples" width="90" x="179" y="120">
            <parameter key="condition_class" value="attribute_value_filter"/>
            <parameter key="parameter_string" value="dataset = train"/>
          </operator>
          <operator activated="true" class="filter_examples" expanded="true" height="76" name="Filter Examples (2)" width="90" x="447" y="210">
            <parameter key="condition_class" value="attribute_value_filter"/>
            <parameter key="parameter_string" value="dataset = test"/>
          </operator>
          <operator activated="true" class="select_attributes" expanded="true" height="76" name="Select Attributes" width="90" x="380" y="75">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attributes" value="Year|House_size_tot|House_size|Basement_size|House_ground|SN|WE|Price_2009|ID"/>
          </operator>
          <operator activated="true" class="W-M5P" expanded="true" height="76" name="W-M5P" width="90" x="581" y="30">
            <parameter key="R" value="true"/>
            <parameter key="L" value="true"/>
          </operator>
          <operator activated="true" class="apply_model" expanded="true" height="76" name="Apply Model (XY)" width="90" x="715" y="120">
            <list key="application_parameters"/>
          </operator>
          <operator activated="true" class="performance" expanded="true" height="76" name="Performance (2)" width="90" x="916" y="30"/>
          <connect from_port="example set" to_op="ExampleFilter" to_port="example set input"/>
          <connect from_op="ExampleFilter" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
          <connect from_op="Filter Examples" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
          <connect from_op="Filter Examples" from_port="original" to_op="Filter Examples (2)" to_port="example set input"/>
          <connect from_op="Filter Examples (2)" from_port="example set output" to_op="Apply Model (XY)" to_port="unlabelled data"/>
          <connect from_op="Select Attributes" from_port="example set output" to_op="W-M5P" to_port="training set"/>
          <connect from_op="W-M5P" from_port="model" to_op="Apply Model (XY)" to_port="model"/>
          <connect from_op="Apply Model (XY)" from_port="labelled data" to_op="Performance (2)" to_port="labelled data"/>
          <connect from_op="Apply Model (XY)" from_port="model" to_port="out 3"/>
          <connect from_op="Performance (2)" from_port="performance" to_port="out 1"/>
          <connect from_op="Performance (2)" from_port="example set" to_port="out 2"/>
          <portSpacing port="source_example set" spacing="90"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
          <portSpacing port="sink_out 3" spacing="72"/>
          <portSpacing port="sink_out 4" spacing="54"/>
        </process>
      </operator>
      <operator activated="true" class="append" expanded="true" height="76" name="ExampleSetMerge (XY)" width="90" x="916" y="120"/>
      <connect from_op="Retrieve" from_port="output" to_op="Select Attributes (2)" to_port="example set input"/>
      <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Numerical to Binominal" to_port="example set input"/>
      <connect from_op="Numerical to Binominal" from_port="example set output" to_op="ValueIterator" to_port="example set"/>
      <connect from_op="ValueIterator" from_port="out 1" to_port="result 1"/>
      <connect from_op="ValueIterator" from_port="out 2" to_op="ExampleSetMerge (XY)" to_port="example set 1"/>
      <connect from_op="ValueIterator" from_port="out 3" to_port="result 2"/>
      <connect from_op="ExampleSetMerge (XY)" from_port="merged set" to_port="result 3"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="18"/>
      <portSpacing port="sink_result 2" spacing="126"/>
      <portSpacing port="sink_result 3" spacing="90"/>
      <portSpacing port="sink_result 4" spacing="0"/>
    </process>
  </operator>
</process>
Tagged:

Answers

  • landland RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 2,531   Unicorn
    Hi,
    don't know. I'm not familiar with Weka learners and their internals. You could ask on the weka mailing list, but I doubt they will be too happy seeing a RapidMiner process ;)

    Greetings,
      Sebastian
  • ArmenArmen Member Posts: 7 Contributor II
    Hi Sebastian,
    The problem is not in the Weka node (just tried on Weka and it works).
    So I guess that something goes wrong in the RapidMiner node...it looks like some lines are cut.

    The linear model should be in this form:

    Price_2009 =
    -701.4692 * PostCodeIdentifier
    - 12537.1871 * House_type
    + 974.1074 * House
    + 235.7531



    ...but the result show these kind:

    Price_2009 =
    + 235.7531



    If I will not be able to use Weka M5P, is there any RapidMiner algorithm which can create a ModelTree?

    Thank you!
    David
  • ArmenArmen Member Posts: 7 Contributor II
    I fixed the problem: the test set had some data problems.
    I cleaned the data and the model works on the test set.

    Thank you anyway :)

    David


    ps. I would still be interested in knowing if there is any other node to create a Model Tree!
Sign In or Register to comment.