Due to recent updates, all users are required to create an Altair One account to login to the RapidMiner community. Click the Register button to create your account using the same email that you have previously used to login to the RapidMiner community. This will ensure that any previously created content will be synced to your Altair One account. Once you login, you will be asked to provide a username that identifies you to other Community users. Email us at Community with questions.
"LibSVM classfication: X-validation evaluation , Normalization"
aryan_hosseinza
Member Posts: 74 Contributor II
Hi everybody ,
I have a dataset with 12 features for its instances (11 regular features : integers, 1 label feature : nominal)
It's a very imbalanced dataset, I want to use libSVM as a one-class classifier,
Firstly,
As you can see in the code below , I want to use X-Validation to train one-class classifier on minor-class instances of training portion(90%) of dataset and I want to apply this model on the rest of the data (10%) in each validation,
but I can't use Binomial Classification Performance evaluator , as the label is nominal ! if I want to change it to binomial I won't be able to train SVM classifier ! what should I do ?
Secondly,
Generally , what preprocessing do you think is necessary to do before using a libSVM classifier ? I read somewhere in the forum that it's necessary to do a normalization :
"Is there something like a "NormalizationModel" to get the same normalized values for trainig and test examples?
This should be possible by applying the operator Normalization with enabling the parameter return_preprocessing_model on the training data. This means, a preprocessing model is generated by normalizing the training which you can subsequently apply on the test data as well. The normalization is then done on the test data via the same transformation as applied on the training data. Therefore you have to apply this model using the ModelApplier."
but I didn't get what to do ? I would be thankful if you could help me,
And finally ,
I heard that one-class classifiers don't work very good comparing to other classifiers , is that true?
Thanks ,
Arian
I have a dataset with 12 features for its instances (11 regular features : integers, 1 label feature : nominal)
It's a very imbalanced dataset, I want to use libSVM as a one-class classifier,
Firstly,
As you can see in the code below , I want to use X-Validation to train one-class classifier on minor-class instances of training portion(90%) of dataset and I want to apply this model on the rest of the data (10%) in each validation,
but I can't use Binomial Classification Performance evaluator , as the label is nominal ! if I want to change it to binomial I won't be able to train SVM classifier ! what should I do ?
Secondly,
Generally , what preprocessing do you think is necessary to do before using a libSVM classifier ? I read somewhere in the forum that it's necessary to do a normalization :
"Is there something like a "NormalizationModel" to get the same normalized values for trainig and test examples?
This should be possible by applying the operator Normalization with enabling the parameter return_preprocessing_model on the training data. This means, a preprocessing model is generated by normalizing the training which you can subsequently apply on the test data as well. The normalization is then done on the test data via the same transformation as applied on the training data. Therefore you have to apply this model using the ModelApplier."
but I didn't get what to do ? I would be thankful if you could help me,
And finally ,
I heard that one-class classifiers don't work very good comparing to other classifiers , is that true?
Thanks ,
Arian
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.008">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.2.008" expanded="true" name="Process">
<process expanded="true" height="719" width="1485">
<operator activated="true" class="retrieve" compatibility="5.2.008" expanded="true" height="60" name="Retrieve (2)" width="90" x="849" y="30">
<parameter key="repository_entry" value="test1"/>
</operator>
<operator activated="true" class="set_role" compatibility="5.2.008" expanded="true" height="76" name="Set Role (3)" width="90" x="983" y="30">
<parameter key="name" value="event"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="x_validation" compatibility="5.2.008" expanded="true" height="112" name="Validation" width="90" x="1117" y="30">
<process expanded="true" height="677" width="494">
<operator activated="true" class="filter_examples" compatibility="5.2.008" expanded="true" height="76" name="Filter Examples (2)" width="90" x="112" y="30">
<parameter key="condition_class" value="attribute_value_filter"/>
<parameter key="parameter_string" value="event=t"/>
</operator>
<operator activated="true" class="support_vector_machine_libsvm" compatibility="5.2.008" expanded="true" height="76" name="SVM" width="90" x="246" y="30">
<parameter key="svm_type" value="one-class"/>
<parameter key="kernel_type" value="linear"/>
<list key="class_weights"/>
</operator>
<connect from_port="training" to_op="Filter Examples (2)" to_port="example set input"/>
<connect from_op="Filter Examples (2)" from_port="example set output" to_op="SVM" to_port="training set"/>
<connect from_op="SVM" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true" height="677" width="494">
<operator activated="true" class="apply_model" compatibility="5.2.008" expanded="true" height="76" name="Apply Model" width="90" x="112" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance_binominal_classification" compatibility="5.2.008" expanded="true" height="76" name="Performance (2)" width="90" x="246" y="30">
<parameter key="main_criterion" value="AUC"/>
<parameter key="accuracy" value="false"/>
<parameter key="AUC" value="true"/>
<parameter key="f_measure" value="true"/>
<parameter key="false_positive" value="true"/>
<parameter key="false_negative" value="true"/>
<parameter key="true_positive" value="true"/>
<parameter key="true_negative" value="true"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance (2)" to_port="labelled data"/>
<connect from_op="Performance (2)" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<connect from_op="Retrieve (2)" from_port="output" to_op="Set Role (3)" to_port="example set input"/>
<connect from_op="Set Role (3)" from_port="example set output" to_op="Validation" to_port="training"/>
<connect from_op="Validation" from_port="averagable 1" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
Tagged:
0