The Altair Community and the RapidMiner community is on read-only mode until further notice. Technical support via cases will continue to work as is. For any urgent requests from Students/Faculty members, please submit the form linked here.
Options

solved

danzgbdanzgb Member Posts: 7 Contributor II
edited November 2018 in Help

 

 

Solved. 

Best Answer

  • Options
    MartinLiebigMartinLiebig Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor Posts: 3,525 RM Data Scientist
    Solution Accepted

    danzgb,

     

    you can simply copy the xml code and hit ctrl+v to put it into RM. I do not get the error so this is a bit confusing.

     

    On the decision tree: decision trees are one of the standard ways to do classification. In fact this is only a default comment. Since the i treated the label as numerical (not really needed) i used a SVM in regression mode. That's why i did not need to say 10-13 is enough. I simply try to predict the actual number.

     

    ~Martin

    - Sr. Director Data Solutions, Altair RapidMiner -
    Dortmund, Germany

Answers

  • Options
    MartinLiebigMartinLiebig Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor Posts: 3,525 RM Data Scientist

    Hi danzgb,

     

    have you tried something like this? I might have done something wrong, but this process gives an absolute error of 1.8. Which sounds fine?

     

    ~Martin

     

    Spoiler
    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="7.1.000">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="7.1.000" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="read_csv" compatibility="7.1.000" expanded="true" height="68" name="Read CSV" width="90" x="112" y="34">
    <parameter key="csv_file" value="/Users/mschmitz/CODING/RM/student/student-por.csv"/>
    <parameter key="first_row_as_names" value="false"/>
    <list key="annotations">
    <parameter key="0" value="Name"/>
    </list>
    <parameter key="encoding" value="UTF-8"/>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="school.true.polynominal.attribute"/>
    <parameter key="1" value="sex.true.polynominal.attribute"/>
    <parameter key="2" value="age.true.integer.attribute"/>
    <parameter key="3" value="address.true.polynominal.attribute"/>
    <parameter key="4" value="famsize.true.polynominal.attribute"/>
    <parameter key="5" value="Pstatus.true.polynominal.attribute"/>
    <parameter key="6" value="Medu.true.integer.attribute"/>
    <parameter key="7" value="Fedu.true.integer.attribute"/>
    <parameter key="8" value="Mjob.true.polynominal.attribute"/>
    <parameter key="9" value="Fjob.true.polynominal.attribute"/>
    <parameter key="10" value="reason.true.polynominal.attribute"/>
    <parameter key="11" value="guardian.true.polynominal.attribute"/>
    <parameter key="12" value="traveltime.true.integer.attribute"/>
    <parameter key="13" value="studytime.true.integer.attribute"/>
    <parameter key="14" value="failures.true.integer.attribute"/>
    <parameter key="15" value="schoolsup.true.polynominal.attribute"/>
    <parameter key="16" value="famsup.true.polynominal.attribute"/>
    <parameter key="17" value="paid.true.polynominal.attribute"/>
    <parameter key="18" value="activities.true.polynominal.attribute"/>
    <parameter key="19" value="nursery.true.polynominal.attribute"/>
    <parameter key="20" value="higher.true.polynominal.attribute"/>
    <parameter key="21" value="internet.true.polynominal.attribute"/>
    <parameter key="22" value="romantic.true.polynominal.attribute"/>
    <parameter key="23" value="famrel.true.integer.attribute"/>
    <parameter key="24" value="freetime.true.integer.attribute"/>
    <parameter key="25" value="goout.true.integer.attribute"/>
    <parameter key="26" value="Dalc.true.integer.attribute"/>
    <parameter key="27" value="Walc.true.integer.attribute"/>
    <parameter key="28" value="health.true.integer.attribute"/>
    <parameter key="29" value="absences.true.integer.attribute"/>
    <parameter key="30" value="G1.true.integer.attribute"/>
    <parameter key="31" value="G2.true.integer.attribute"/>
    <parameter key="32" value="G3.true.integer.attribute"/>
    </list>
    </operator>
    <operator activated="true" class="set_role" compatibility="7.1.000" expanded="true" height="82" name="Set Role" width="90" x="246" y="34">
    <parameter key="attribute_name" value="G3"/>
    <parameter key="target_role" value="label"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="nominal_to_numerical" compatibility="7.1.000" expanded="true" height="103" name="Nominal to Numerical" width="90" x="447" y="34">
    <list key="comparison_groups"/>
    </operator>
    <operator activated="true" class="x_validation" compatibility="5.0.000" expanded="true" height="124" name="Validation" width="90" x="648" y="34">
    <parameter key="sampling_type" value="2"/>
    <process expanded="true">
    <operator activated="true" class="support_vector_machine" compatibility="7.1.000" expanded="true" height="124" name="SVM" width="90" x="45" y="34"/>
    <connect from_port="training" to_op="SVM" to_port="training set"/>
    <connect from_op="SVM" from_port="model" to_port="model"/>
    <portSpacing port="source_training" spacing="0"/>
    <portSpacing port="sink_model" spacing="0"/>
    <portSpacing port="sink_through 1" spacing="0"/>
    </process>
    <process expanded="true">
    <operator activated="true" class="apply_model" compatibility="5.0.000" expanded="true" height="82" name="Apply Model" width="90" x="45" y="30">
    <list key="application_parameters"/>
    </operator>
    <operator activated="true" class="performance_regression" compatibility="7.1.000" expanded="true" height="82" name="Performance" width="90" x="179" y="34">
    <parameter key="absolute_error" value="true"/>
    <parameter key="squared_correlation" value="true"/>
    </operator>
    <connect from_port="model" to_op="Apply Model" to_port="model"/>
    <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
    <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
    <connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
    <portSpacing port="source_model" spacing="0"/>
    <portSpacing port="source_test set" spacing="0"/>
    <portSpacing port="source_through 1" spacing="0"/>
    <portSpacing port="sink_averagable 1" spacing="0"/>
    <portSpacing port="sink_averagable 2" spacing="0"/>
    </process>
    <description align="center" color="transparent" colored="false" width="126">A cross-validation evaluating a decision tree model.</description>
    </operator>
    <connect from_op="Read CSV" from_port="output" to_op="Set Role" to_port="example set input"/>
    <connect from_op="Set Role" from_port="example set output" to_op="Nominal to Numerical" to_port="example set input"/>
    <connect from_op="Nominal to Numerical" from_port="example set output" to_op="Validation" to_port="training"/>
    <connect from_op="Validation" from_port="model" to_port="result 1"/>
    <connect from_op="Validation" from_port="averagable 1" to_port="result 2"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    <portSpacing port="sink_result 3" spacing="0"/>
    </process>
    </operator>
    </process>
    - Sr. Director Data Solutions, Altair RapidMiner -
    Dortmund, Germany
  • Options
    danzgbdanzgb Member Posts: 7 Contributor II

    First of all thanks a lot for your reply @mschmitz.

    I'm really newbie in RapidMiner.. Where should i paste that code you gave me? In the "XML view"? I can't find where is the xml view...

     

    Thanks one more time.

  • Options
    danzgbdanzgb Member Posts: 7 Contributor II

    Ok
    !

  • Options
    danzgbdanzgb Member Posts: 7 Contributor II

    .
     

  • Options
    danzgbdanzgb Member Posts: 7 Contributor II


    Thanks again Martin, you're really helping me out!

  • Options
    danzgbdanzgb Member Posts: 7 Contributor II

    .

  • Options
    MartinLiebigMartinLiebig Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor Posts: 3,525 RM Data Scientist

    Hi danzg,

     

    the key operator is aggregate. You can built of those values with a combination of Generate Attributes, Aggregate, and Filter Examples. I built the number of people below 10 process and attached it.

     

    Edit: Oops, the processes are obviously only counting how often you have them in the data. To forecast you would simply take < 10 as a label and learn a model to predict. You can apply the model then on the new data and use the aggregate to count on the prediction

     

    Best,

    Martin

     

    Spoiler
    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="7.1.001">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="7.1.001" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="read_csv" compatibility="7.1.001" expanded="true" height="68" name="Read CSV" width="90" x="112" y="34">
    <parameter key="csv_file" value="C:\Users\Martin\Arbeit\Forum/student-por.csv"/>
    <parameter key="first_row_as_names" value="false"/>
    <list key="annotations">
    <parameter key="0" value="Name"/>
    </list>
    <parameter key="encoding" value="UTF-8"/>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="school.true.polynominal.attribute"/>
    <parameter key="1" value="sex.true.polynominal.attribute"/>
    <parameter key="2" value="age.true.integer.attribute"/>
    <parameter key="3" value="address.true.polynominal.attribute"/>
    <parameter key="4" value="famsize.true.polynominal.attribute"/>
    <parameter key="5" value="Pstatus.true.polynominal.attribute"/>
    <parameter key="6" value="Medu.true.integer.attribute"/>
    <parameter key="7" value="Fedu.true.integer.attribute"/>
    <parameter key="8" value="Mjob.true.polynominal.attribute"/>
    <parameter key="9" value="Fjob.true.polynominal.attribute"/>
    <parameter key="10" value="reason.true.polynominal.attribute"/>
    <parameter key="11" value="guardian.true.polynominal.attribute"/>
    <parameter key="12" value="traveltime.true.integer.attribute"/>
    <parameter key="13" value="studytime.true.integer.attribute"/>
    <parameter key="14" value="failures.true.integer.attribute"/>
    <parameter key="15" value="schoolsup.true.polynominal.attribute"/>
    <parameter key="16" value="famsup.true.polynominal.attribute"/>
    <parameter key="17" value="paid.true.polynominal.attribute"/>
    <parameter key="18" value="activities.true.polynominal.attribute"/>
    <parameter key="19" value="nursery.true.polynominal.attribute"/>
    <parameter key="20" value="higher.true.polynominal.attribute"/>
    <parameter key="21" value="internet.true.polynominal.attribute"/>
    <parameter key="22" value="romantic.true.polynominal.attribute"/>
    <parameter key="23" value="famrel.true.integer.attribute"/>
    <parameter key="24" value="freetime.true.integer.attribute"/>
    <parameter key="25" value="goout.true.integer.attribute"/>
    <parameter key="26" value="Dalc.true.integer.attribute"/>
    <parameter key="27" value="Walc.true.integer.attribute"/>
    <parameter key="28" value="health.true.integer.attribute"/>
    <parameter key="29" value="absences.true.integer.attribute"/>
    <parameter key="30" value="G1.true.integer.attribute"/>
    <parameter key="31" value="G2.true.integer.attribute"/>
    <parameter key="32" value="G3.true.integer.attribute"/>
    </list>
    </operator>
    <operator activated="true" class="set_role" compatibility="7.1.001" expanded="true" height="82" name="Set Role" width="90" x="246" y="34">
    <parameter key="attribute_name" value="G3"/>
    <parameter key="target_role" value="label"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="generate_attributes" compatibility="7.1.001" expanded="true" height="82" name="Generate Attributes" width="90" x="380" y="34">
    <list key="function_descriptions">
    <parameter key="G3 Below 10" value="G3&lt;10"/>
    </list>
    </operator>
    <operator activated="true" class="aggregate" compatibility="7.1.001" expanded="true" height="82" name="Aggregate" width="90" x="514" y="34">
    <list key="aggregation_attributes">
    <parameter key="G3 Below 10" value="count"/>
    </list>
    <parameter key="group_by_attributes" value="G3 Below 10"/>
    </operator>
    <connect from_op="Read CSV" from_port="output" to_op="Set Role" to_port="example set input"/>
    <connect from_op="Set Role" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
    <connect from_op="Generate Attributes" from_port="example set output" to_op="Aggregate" to_port="example set input"/>
    <connect from_op="Aggregate" from_port="example set output" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    </process>
    </operator>
    </process>

     

     

     

    - Sr. Director Data Solutions, Altair RapidMiner -
    Dortmund, Germany
Sign In or Register to comment.