Options

Modelling with a label that has 6 classes, and using 4 other polynominal attributes

18a641r18a641r Member Posts: 4 Contributor I
edited November 2018 in Help

Hi

 

I am new to rapidminer, and would like to find out if there's any model which i could use for predicting a label with 6 classes.

 

I have a data set which has 5 fields,

1. Industry Type (label)

2. Cardname

3. Education Level

4. Gender

5. Marital Status

 

I would like to use attribute 2 - 5 to predict the industry type. I have attached the file for reference.

 

Thanks in advance for the assistance.

 

 

 

Tagged:

Answers

  • Options
    lionelderkrikorlionelderkrikor Moderator, RapidMiner Certified Analyst, Member Posts: 1,195 Unicorn

    Hi @18a641r,

     

    First , if you are new to RapidMiner, I encourage you to see these training videos to learn the basics of RapidMiner.

    I played a little with your data, and I'm not able to find a relevant model (the best model has an accuracy of  ~16%).

    When we see your data, all attributes are very "homogeneous" / "uniform" : 

    Simple_Process.png

    So I think no algorithm is able to find correlations between your 4 attributes and your label (IndustryType).

    You can test different models and for a given model play with its parameters to see how is the performance of your model evolving : 

    It's a good method to begin to learn RapidMiner.

    Finally, you can find here a basic process implementing a Decision Tree model : 

    <?xml version="1.0" encoding="UTF-8"?><process version="9.0.000-BETA">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="9.0.000-BETA" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="read_csv" compatibility="9.0.000-BETA" expanded="true" height="68" name="Read CSV" width="90" x="45" y="85">
    <parameter key="csv_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\Simple_Process\CCD_Table (Based on 4 Attributes).csv"/>
    <parameter key="column_separators" value=","/>
    <parameter key="skip_comments" value="true"/>
    <parameter key="date_format" value="MMM d, yyyy h:mm:ss a z"/>
    <list key="annotations"/>
    <parameter key="encoding" value="windows-1252"/>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="IndustryType.true.polynominal.attribute"/>
    <parameter key="1" value="CardName.true.polynominal.attribute"/>
    <parameter key="2" value="EducationLevel.true.polynominal.attribute"/>
    <parameter key="3" value="Gender.true.polynominal.attribute"/>
    <parameter key="4" value="MaritalStatus.true.polynominal.attribute"/>
    </list>
    <parameter key="read_not_matching_values_as_missings" value="false"/>
    </operator>
    <operator activated="true" class="set_role" compatibility="9.0.000-BETA" expanded="true" height="82" name="Set Role" width="90" x="179" y="85">
    <parameter key="attribute_name" value="IndustryType"/>
    <parameter key="target_role" value="label"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="concurrency:cross_validation" compatibility="9.0.000-BETA" expanded="true" height="145" name="Cross Validation" width="90" x="313" y="85">
    <process expanded="true">
    <operator activated="true" class="concurrency:parallel_decision_tree" compatibility="9.0.000-BETA" expanded="true" height="103" name="Decision Tree" width="90" x="179" y="34"/>
    <connect from_port="training set" to_op="Decision Tree" to_port="training set"/>
    <connect from_op="Decision Tree" from_port="model" to_port="model"/>
    <portSpacing port="source_training set" spacing="0"/>
    <portSpacing port="sink_model" spacing="0"/>
    <portSpacing port="sink_through 1" spacing="0"/>
    </process>
    <process expanded="true">
    <operator activated="true" class="apply_model" compatibility="9.0.000-BETA" expanded="true" height="82" name="Apply Model" width="90" x="112" y="34">
    <list key="application_parameters"/>
    </operator>
    <operator activated="true" class="performance_classification" compatibility="9.0.000-BETA" expanded="true" height="82" name="Performance" width="90" x="246" y="34">
    <list key="class_weights"/>
    </operator>
    <connect from_port="model" to_op="Apply Model" to_port="model"/>
    <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
    <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
    <connect from_op="Performance" from_port="performance" to_port="performance 1"/>
    <connect from_op="Performance" from_port="example set" to_port="test set results"/>
    <portSpacing port="source_model" spacing="0"/>
    <portSpacing port="source_test set" spacing="0"/>
    <portSpacing port="source_through 1" spacing="0"/>
    <portSpacing port="sink_test set results" spacing="0"/>
    <portSpacing port="sink_performance 1" spacing="0"/>
    <portSpacing port="sink_performance 2" spacing="0"/>
    </process>
    </operator>
    <connect from_op="Read CSV" from_port="output" to_op="Set Role" to_port="example set input"/>
    <connect from_op="Set Role" from_port="example set output" to_op="Cross Validation" to_port="example set"/>
    <connect from_op="Cross Validation" from_port="model" to_port="result 2"/>
    <connect from_op="Cross Validation" from_port="example set" to_port="result 1"/>
    <connect from_op="Cross Validation" from_port="test result set" to_port="result 3"/>
    <connect from_op="Cross Validation" from_port="performance 1" to_port="result 4"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    <portSpacing port="sink_result 3" spacing="0"/>
    <portSpacing port="sink_result 4" spacing="0"/>
    <portSpacing port="sink_result 5" spacing="0"/>
    </process>
    </operator>
    </process>

    I hope it helps,

     

    Regards,

     

    Lionel

     

    NB : Sometimes, you have to resign yourself : Although, Machine Learning is a powerful tool, it is helpless in the face of certain problems.... 

     

  • Options
    rfuentealbarfuentealba Moderator, RapidMiner Certified Analyst, Member, University Professor Posts: 568 Unicorn

    Hi @18a641r

     

    Like @lionelderkrikor, I took a look at your data, and it looks more like a cross join among 4 categories (Card Name, Education Level, Gender and Marital Status). A cross join between two entities gives you all the possible combinations of classes.

     

    Let's dive deeper (not as deep as my sensei Lionel, but enough to build an idea on how classification problems work): Given the following table, where AL is the label and (A1, A2) are the combinations:

     

    AL A1 A2

     0  0  0

     0  0  1

     0  1  0

     0  1  1

     1  0  0

     1  0  1

     1   0

     1   1

     

    If you apply a model (let's say, a Decision Tree, which is the easiest one to understand, and the first one you are presented with when you open the RapidMiner Titanic Tutorial), it will be only 50% confident that any combination of A1 and A2 is 0. On the other hand, the following:

     

    AL A1 A2

     1  0  0

     0  0  1

     0   0

     1   1

     1  0  0

     0  0  1

     0   0

     1   1

     

    (It's simple: if A1 and A2 are equal, the label is 1; else 0).

     

    Here you have an XML process for the latter, just to feed your curiosity. (Turns out I was preparing a class for tomorrow and was working with the same things). You need a new extension, the Operator Toolbox, I use it a lot to create example sets on the fly to test some things.

     

    Run the process, then try replacing something and see what happens.

     

    <?xml version="1.0" encoding="UTF-8"?><process version="9.0.000-BETA">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="9.0.000-BETA" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="operator_toolbox:create_exampleset" compatibility="1.2.000" expanded="true" height="68" name="Create ExampleSet" width="90" x="112" y="34">
    <parameter key="generator_type" value="comma_separated_text"/>
    <list key="function_descriptions"/>
    <list key="numeric_series_configuration"/>
    <list key="date_series_configuration"/>
    <list key="date_series_configuration (interval)"/>
    <parameter key="input_csv_text" value="AL,A1,A2&#10;1,0,0&#10;0,0,1&#10;0,1,0&#10;1,1,1&#10;1,0,0&#10;0,0,1&#10;0,1,0&#10;1,1,1"/>
    </operator>
    <operator activated="true" class="numerical_to_polynominal" compatibility="9.0.000-BETA" expanded="true" height="82" name="Numerical to Polynominal" width="90" x="246" y="34"/>
    <operator activated="true" class="set_role" compatibility="9.0.000-BETA" expanded="true" height="82" name="Set Role" width="90" x="380" y="34">
    <parameter key="attribute_name" value="AL"/>
    <parameter key="target_role" value="label"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="concurrency:parallel_decision_tree" compatibility="9.0.000-BETA" expanded="true" height="103" name="Decision Tree" width="90" x="514" y="34">
    <parameter key="apply_pruning" value="false"/>
    <parameter key="apply_prepruning" value="false"/>
    </operator>
    <connect from_op="Create ExampleSet" from_port="output" to_op="Numerical to Polynominal" to_port="example set input"/>
    <connect from_op="Numerical to Polynominal" from_port="example set output" to_op="Set Role" to_port="example set input"/>
    <connect from_op="Set Role" from_port="example set output" to_op="Decision Tree" to_port="training set"/>
    <connect from_op="Decision Tree" from_port="model" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    </process>
    </operator>
    </process>

    Hope it helps!

     

Sign In or Register to comment.