Options

Polynomial Regression

josh321josh321 Member Posts: 9 Contributor II
edited November 2018 in Help
I don't think I understand this operator and I am unable to produce an acceptable result. This is a very basic sample of a process to test the operator.
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.006">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.2.006" expanded="true" name="Process">
    <process expanded="true" height="620" width="1004">
      <operator activated="true" class="read_excel" compatibility="5.2.006" expanded="true" height="60" name="Read Excel" width="90" x="45" y="75">
        <parameter key="excel_file" value="C:\Files\1.xls"/>
        <parameter key="imported_cell_range" value="A2:BU557"/>
        <parameter key="first_row_as_names" value="false"/>
        <list key="annotations"/>
        <list key="data_set_meta_data_information">
          <parameter key="0" value="A.true.numeric.attribute"/>
          <parameter key="1" value="B.true.numeric.attribute"/>
          <parameter key="2" value="C.true.numeric.attribute"/>
          <parameter key="3" value="D.true.numeric.attribute"/>
          <parameter key="4" value="E.true.numeric.attribute"/>
          <parameter key="5" value="F.true.numeric.attribute"/>
          <parameter key="6" value="G.true.numeric.attribute"/>
          <parameter key="7" value="H.true.numeric.attribute"/>
          <parameter key="8" value="I.true.numeric.attribute"/>
          <parameter key="9" value="J.true.numeric.attribute"/>
          <parameter key="10" value="K.true.numeric.attribute"/>
          <parameter key="11" value="L.true.numeric.attribute"/>
          <parameter key="12" value="M.true.numeric.attribute"/>
          <parameter key="13" value="N.true.real.attribute"/>
          <parameter key="14" value="O.true.numeric.attribute"/>
          <parameter key="15" value="P.true.numeric.attribute"/>
          <parameter key="16" value="Q.true.numeric.attribute"/>
          <parameter key="17" value="R.true.numeric.attribute"/>
          <parameter key="18" value="S.true.numeric.attribute"/>
          <parameter key="19" value="T.true.numeric.attribute"/>
          <parameter key="20" value="U.true.numeric.attribute"/>
          <parameter key="21" value="V.true.numeric.attribute"/>
          <parameter key="22" value="W.true.numeric.attribute"/>
          <parameter key="23" value="X.true.numeric.attribute"/>
          <parameter key="24" value="Y.true.numeric.attribute"/>
          <parameter key="25" value="Z.true.real.attribute"/>
          <parameter key="26" value="AA.true.numeric.attribute"/>
          <parameter key="27" value="AB.true.numeric.attribute"/>
          <parameter key="28" value="AC.true.numeric.attribute"/>
          <parameter key="29" value="AD.true.numeric.attribute"/>
          <parameter key="30" value="AE.true.numeric.attribute"/>
          <parameter key="31" value="AF.true.numeric.attribute"/>
          <parameter key="32" value="AG.true.numeric.attribute"/>
          <parameter key="33" value="AH.true.real.attribute"/>
          <parameter key="34" value="AI.true.real.attribute"/>
          <parameter key="35" value="AJ.true.real.attribute"/>
          <parameter key="36" value="AK.true.numeric.attribute"/>
          <parameter key="37" value="AL.true.numeric.attribute"/>
          <parameter key="38" value="AM.true.numeric.attribute"/>
          <parameter key="39" value="AN.true.numeric.attribute"/>
          <parameter key="40" value="AO.true.numeric.attribute"/>
          <parameter key="41" value="AP.true.numeric.attribute"/>
          <parameter key="42" value="AQ.true.numeric.attribute"/>
          <parameter key="43" value="AR.true.numeric.attribute"/>
          <parameter key="44" value="AS.true.numeric.attribute"/>
          <parameter key="45" value="AT.true.numeric.attribute"/>
          <parameter key="46" value="AU.true.numeric.attribute"/>
          <parameter key="47" value="AV.true.numeric.attribute"/>
          <parameter key="48" value="AW.true.integer.attribute"/>
          <parameter key="49" value="AX.true.real.attribute"/>
          <parameter key="50" value="AY.true.numeric.attribute"/>
          <parameter key="51" value="AZ.true.numeric.attribute"/>
          <parameter key="52" value="BA.true.binominal.attribute"/>
          <parameter key="53" value="BB.true.real.attribute"/>
          <parameter key="54" value="BC.true.real.attribute"/>
          <parameter key="55" value="BD.true.real.attribute"/>
          <parameter key="56" value="BE.true.real.attribute"/>
          <parameter key="57" value="BF.true.real.attribute"/>
          <parameter key="58" value="BG.true.real.attribute"/>
          <parameter key="59" value="BH.true.real.attribute"/>
          <parameter key="60" value="BI.true.real.attribute"/>
          <parameter key="61" value="BJ.true.real.attribute"/>
          <parameter key="62" value="BK.true.real.attribute"/>
          <parameter key="63" value="BL.true.integer.attribute"/>
          <parameter key="64" value="BM.true.real.attribute"/>
          <parameter key="65" value="BN.true.real.attribute"/>
          <parameter key="66" value="BO.true.real.attribute"/>
          <parameter key="67" value="BP.true.real.attribute"/>
          <parameter key="68" value="BQ.true.real.attribute"/>
          <parameter key="69" value="BR.true.real.attribute"/>
          <parameter key="70" value="BS.true.integer.attribute"/>
          <parameter key="71" value="BT.true.real.attribute"/>
          <parameter key="72" value="BU.true.integer.attribute"/>
        </list>
      </operator>
      <operator activated="true" class="work_on_subset" compatibility="5.2.006" expanded="true" height="94" name="Work on Subset" width="90" x="246" y="75">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="|AQ|Y"/>
        <process expanded="true" height="638" width="1022">
          <operator activated="true" class="set_role" compatibility="5.2.006" expanded="true" height="76" name="Set Role" width="90" x="112" y="30">
            <parameter key="name" value="Y"/>
            <parameter key="target_role" value="label"/>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="polynomial_regression" compatibility="5.2.006" expanded="true" height="76" name="Polynomial Regression" width="90" x="313" y="30">
            <parameter key="max_degree" value="2"/>
            <parameter key="use_local_random_seed" value="true"/>
            <parameter key="local_random_seed" value="500"/>
          </operator>
          <connect from_port="exampleSet" to_op="Set Role" to_port="example set input"/>
          <connect from_op="Set Role" from_port="example set output" to_op="Polynomial Regression" to_port="training set"/>
          <connect from_op="Polynomial Regression" from_port="model" to_port="through 1"/>
          <connect from_op="Polynomial Regression" from_port="exampleSet" to_port="example set"/>
          <portSpacing port="source_exampleSet" spacing="0"/>
          <portSpacing port="sink_example set" spacing="0"/>
          <portSpacing port="sink_through 1" spacing="0"/>
          <portSpacing port="sink_through 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="apply_model" compatibility="5.2.006" expanded="true" height="76" name="Apply Model" width="90" x="447" y="75">
        <list key="application_parameters"/>
      </operator>
      <connect from_op="Read Excel" from_port="output" to_op="Work on Subset" to_port="example set"/>
      <connect from_op="Work on Subset" from_port="example set" to_op="Apply Model" to_port="unlabelled data"/>
      <connect from_op="Work on Subset" from_port="through 1" to_op="Apply Model" to_port="model"/>
      <connect from_op="Apply Model" from_port="labelled data" to_port="result 1"/>
      <connect from_op="Apply Model" from_port="model" to_port="result 2"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
    </process>
  </operator>
</process>
And this is the resulting plot.

image

I realize I could do this part in excel or scilab, but I figured rapidminer must be capable and I must be doing something wrong. Any help would be greatly appreciated! :)

Answers

  • Options
    MariusHelfMariusHelf RapidMiner Certified Expert, Member Posts: 1,869 Unicorn
    You are applying the model on a dataset with different attributes than the training set, since Apply Model is outside of Work on Subset (which is kind of deprecated anyway). I tried the Polynomial Regression operator on random data, and it looks quit well. Please have a look at the attached process.

    Best, Marius
    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="5.2.006">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="5.2.006" expanded="true" name="Process">
        <process expanded="true" height="658" width="567">
          <operator activated="true" class="generate_data" compatibility="5.2.006" expanded="true" height="60" name="Generate Data" width="90" x="45" y="30">
            <parameter key="target_function" value="sum"/>
            <parameter key="number_of_attributes" value="1"/>
          </operator>
          <operator activated="true" class="add_noise" compatibility="5.2.006" expanded="true" height="94" name="Add Noise" width="90" x="179" y="30">
            <list key="noise"/>
          </operator>
          <operator activated="true" class="polynomial_regression" compatibility="5.2.006" expanded="true" height="76" name="Polynomial Regression (2)" width="90" x="313" y="30">
            <parameter key="max_degree" value="2"/>
            <parameter key="use_local_random_seed" value="true"/>
            <parameter key="local_random_seed" value="500"/>
          </operator>
          <operator activated="true" class="apply_model" compatibility="5.2.006" expanded="true" height="76" name="Apply Model" width="90" x="447" y="30">
            <list key="application_parameters"/>
          </operator>
          <connect from_op="Generate Data" from_port="output" to_op="Add Noise" to_port="example set input"/>
          <connect from_op="Add Noise" from_port="example set output" to_op="Polynomial Regression (2)" to_port="training set"/>
          <connect from_op="Polynomial Regression (2)" from_port="model" to_op="Apply Model" to_port="model"/>
          <connect from_op="Polynomial Regression (2)" from_port="exampleSet" to_op="Apply Model" to_port="unlabelled data"/>
          <connect from_op="Apply Model" from_port="labelled data" to_port="result 1"/>
          <connect from_op="Apply Model" from_port="model" to_port="result 2"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
          <portSpacing port="sink_result 3" spacing="0"/>
        </process>
      </operator>
    </process>
  • Options
    josh321josh321 Member Posts: 9 Contributor II
    Hmm.. Your process works for me as well. Yet when I try to structure mine the same, I get the same results as in my first post.
    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="5.2.006">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="5.2.006" expanded="true" name="Process">
        <process expanded="true" height="620" width="1004">
          <operator activated="true" class="read_excel" compatibility="5.2.006" expanded="true" height="60" name="Read Excel" width="90" x="45" y="75">
            <parameter key="excel_file" value="C:\Users\Josh\Desktop\ITADS\1.xls"/>
            <parameter key="imported_cell_range" value="A2:BU557"/>
            <parameter key="first_row_as_names" value="false"/>
            <list key="annotations"/>
            <list key="data_set_meta_data_information">
              <parameter key="0" value="A.true.numeric.attribute"/>
              <parameter key="1" value="B.true.numeric.attribute"/>
              <parameter key="2" value="C.true.numeric.attribute"/>
              <parameter key="3" value="D.true.numeric.attribute"/>
              <parameter key="4" value="E.true.numeric.attribute"/>
              <parameter key="5" value="F.true.numeric.attribute"/>
              <parameter key="6" value="G.true.numeric.attribute"/>
              <parameter key="7" value="H.true.numeric.attribute"/>
              <parameter key="8" value="I.true.numeric.attribute"/>
              <parameter key="9" value="J.true.numeric.attribute"/>
              <parameter key="10" value="K.true.numeric.attribute"/>
              <parameter key="11" value="L.true.numeric.attribute"/>
              <parameter key="12" value="M.true.numeric.attribute"/>
              <parameter key="13" value="N.true.real.attribute"/>
              <parameter key="14" value="O.true.numeric.attribute"/>
              <parameter key="15" value="P.true.numeric.attribute"/>
              <parameter key="16" value="Q.true.numeric.attribute"/>
              <parameter key="17" value="R.true.numeric.attribute"/>
              <parameter key="18" value="S.true.numeric.attribute"/>
              <parameter key="19" value="T.true.numeric.attribute"/>
              <parameter key="20" value="U.true.numeric.attribute"/>
              <parameter key="21" value="V.true.numeric.attribute"/>
              <parameter key="22" value="W.true.numeric.attribute"/>
              <parameter key="23" value="X.true.numeric.attribute"/>
              <parameter key="24" value="Y.true.numeric.attribute"/>
              <parameter key="25" value="Z.true.real.attribute"/>
              <parameter key="26" value="AA.true.numeric.attribute"/>
              <parameter key="27" value="AB.true.numeric.attribute"/>
              <parameter key="28" value="AC.true.numeric.attribute"/>
              <parameter key="29" value="AD.true.numeric.attribute"/>
              <parameter key="30" value="AE.true.numeric.attribute"/>
              <parameter key="31" value="AF.true.numeric.attribute"/>
              <parameter key="32" value="AG.true.numeric.attribute"/>
              <parameter key="33" value="AH.true.real.attribute"/>
              <parameter key="34" value="AI.true.real.attribute"/>
              <parameter key="35" value="AJ.true.real.attribute"/>
              <parameter key="36" value="AK.true.numeric.attribute"/>
              <parameter key="37" value="AL.true.numeric.attribute"/>
              <parameter key="38" value="AM.true.numeric.attribute"/>
              <parameter key="39" value="AN.true.numeric.attribute"/>
              <parameter key="40" value="AO.true.numeric.attribute"/>
              <parameter key="41" value="AP.true.numeric.attribute"/>
              <parameter key="42" value="AQ.true.numeric.attribute"/>
              <parameter key="43" value="AR.true.numeric.attribute"/>
              <parameter key="44" value="AS.true.numeric.attribute"/>
              <parameter key="45" value="AT.true.numeric.attribute"/>
              <parameter key="46" value="AU.true.numeric.attribute"/>
              <parameter key="47" value="AV.true.numeric.attribute"/>
              <parameter key="48" value="AW.true.integer.attribute"/>
              <parameter key="49" value="AX.true.real.attribute"/>
              <parameter key="50" value="AY.true.numeric.attribute"/>
              <parameter key="51" value="AZ.true.numeric.attribute"/>
              <parameter key="52" value="BA.true.binominal.attribute"/>
              <parameter key="53" value="BB.true.real.attribute"/>
              <parameter key="54" value="BC.true.real.attribute"/>
              <parameter key="55" value="BD.true.real.attribute"/>
              <parameter key="56" value="BE.true.real.attribute"/>
              <parameter key="57" value="BF.true.real.attribute"/>
              <parameter key="58" value="BG.true.real.attribute"/>
              <parameter key="59" value="BH.true.real.attribute"/>
              <parameter key="60" value="BI.true.real.attribute"/>
              <parameter key="61" value="BJ.true.real.attribute"/>
              <parameter key="62" value="BK.true.real.attribute"/>
              <parameter key="63" value="BL.true.integer.attribute"/>
              <parameter key="64" value="BM.true.real.attribute"/>
              <parameter key="65" value="BN.true.real.attribute"/>
              <parameter key="66" value="BO.true.real.attribute"/>
              <parameter key="67" value="BP.true.real.attribute"/>
              <parameter key="68" value="BQ.true.real.attribute"/>
              <parameter key="69" value="BR.true.real.attribute"/>
              <parameter key="70" value="BS.true.integer.attribute"/>
              <parameter key="71" value="BT.true.real.attribute"/>
              <parameter key="72" value="BU.true.integer.attribute"/>
            </list>
          </operator>
          <operator activated="true" class="select_attributes" compatibility="5.2.006" expanded="true" height="76" name="Select Attributes" width="90" x="246" y="75">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attributes" value="|AQ|Y"/>
          </operator>
          <operator activated="true" class="set_role" compatibility="5.2.006" expanded="true" height="76" name="Set Role" width="90" x="380" y="75">
            <parameter key="name" value="Y"/>
            <parameter key="target_role" value="label"/>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="polynomial_regression" compatibility="5.2.006" expanded="true" height="76" name="Polynomial Regression" width="90" x="514" y="75">
            <parameter key="max_degree" value="2"/>
            <parameter key="use_local_random_seed" value="true"/>
            <parameter key="local_random_seed" value="500"/>
          </operator>
          <operator activated="true" class="apply_model" compatibility="5.2.006" expanded="true" height="76" name="Apply Model" width="90" x="648" y="75">
            <list key="application_parameters"/>
          </operator>
          <connect from_op="Read Excel" from_port="output" to_op="Select Attributes" to_port="example set input"/>
          <connect from_op="Select Attributes" from_port="example set output" to_op="Set Role" to_port="example set input"/>
          <connect from_op="Set Role" from_port="example set output" to_op="Polynomial Regression" to_port="training set"/>
          <connect from_op="Polynomial Regression" from_port="model" to_op="Apply Model" to_port="model"/>
          <connect from_op="Polynomial Regression" from_port="exampleSet" to_op="Apply Model" to_port="unlabelled data"/>
          <connect from_op="Apply Model" from_port="labelled data" to_port="result 1"/>
          <connect from_op="Apply Model" from_port="model" to_port="result 2"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
          <portSpacing port="sink_result 3" spacing="0"/>
        </process>
      </operator>
    </process>
    Also, going on a bit of a tangent.. If I modify your process as below, to include more than one attribute and work on a subset of att1 and label, I get an error saying that no label attribute is found.
    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="5.2.006">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="polynomial_regression" compatibility="5.2.006" expanded="true" height="76" name="Polynomial Regression (2)" width="90" x="313" y="30">
        <parameter key="max_iterations" value="5000"/>
        <parameter key="replication_factor" value="1"/>
        <parameter key="max_degree" value="2"/>
        <parameter key="min_coefficient" value="-100.0"/>
        <parameter key="max_coefficient" value="100.0"/>
        <parameter key="use_local_random_seed" value="true"/>
        <parameter key="local_random_seed" value="500"/>
      </operator>
    </process>
    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="5.2.006">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="apply_model" compatibility="5.2.006" expanded="true" height="76" name="Apply Model" width="90" x="447" y="30">
        <list key="application_parameters"/>
        <parameter key="create_view" value="false"/>
      </operator>
    </process>
  • Options
    MariusHelfMariusHelf RapidMiner Certified Expert, Member Posts: 1,869 Unicorn
    Hm.. honestly I have no idea why you get that bad results. In your special use case you have linear data anyway, so probably a linear regression would work better here. Maybe you could also try higher values for max iterations.

    The second process you posted is not valid, probably something went wrong with copy-paste :)

    Best, Marius
Sign In or Register to comment.