Options

One-class SVM for text classfication

In777In777 Member Posts: 29 Contributor II
edited November 2018 in Help

I classify several examples (each example is a small text) using the one-class SVM. I got a problem that I cannot fix "Needs a nominal label with 2 or more values".  I tried to set role for the label, to change the label to numerical, to use meta "Polynomial to binominal". Nothing worked.  n the last case I got a messsage that there is no obvious mistake, but still the program does not give me any results. There were some posts here related to the topic, but there is still no correct answer. I would appreciate any help. Here is my original xml, It works for LibSVM in multi-class case:

 

 

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.013">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.3.013" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="text:process_document_from_file" compatibility="5.3.002" expanded="true" height="76" name="Process Categories" width="90" x="112" y="75">
        <list key="text_directories">
          <parameter key="En1" value="D:\Categories\En"/>
        </list>
        <parameter key="keep_text" value="true"/>
        <process expanded="true">
          <operator activated="true" class="text:tokenize" compatibility="5.3.002" expanded="true" name="Tokenize"/>
          <operator activated="true" class="text:filter_stopwords_english" compatibility="5.3.002" expanded="true" name="Filter Stopwords (English)"/>
          <operator activated="true" class="text:stem_porter" compatibility="5.3.002" expanded="true" name="Stem (Porter)"/>
          <operator activated="true" class="text:generate_n_grams_terms" compatibility="5.3.002" expanded="true" name="Generate n-Grams (Terms)">
            <parameter key="max_length" value="4"/>
          </operator>
          <connect from_port="document" to_op="Tokenize" to_port="document"/>
          <connect from_op="Tokenize" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
          <connect from_op="Filter Stopwords (English)" from_port="document" to_op="Stem (Porter)" to_port="document"/>
          <connect from_op="Stem (Porter)" from_port="document" to_op="Generate n-Grams (Terms)" to_port="document"/>
          <connect from_op="Generate n-Grams (Terms)" from_port="document" to_port="document 1"/>
          <portSpacing port="source_document" spacing="0"/>
          <portSpacing port="sink_document 1" spacing="0"/>
          <portSpacing port="sink_document 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="x_validation" compatibility="5.3.013" expanded="true" height="112" name="Validation" width="90" x="313" y="75">
        <process expanded="true">
          <operator activated="true" class="support_vector_machine_libsvm" compatibility="5.3.013" expanded="true" height="76" name="SVM" width="90" x="110" y="30">
            <parameter key="svm_type" value="one-class"/>
            <list key="class_weights"/>
          </operator>
          <connect from_port="training" to_op="SVM" to_port="training set"/>
          <connect from_op="SVM" from_port="model" to_port="model"/>
          <portSpacing port="source_training" spacing="0"/>
          <portSpacing port="sink_model" spacing="0"/>
          <portSpacing port="sink_through 1" spacing="0"/>
        </process>
        <process expanded="true">
          <operator activated="true" class="apply_model" compatibility="5.3.013" expanded="true" height="76" name="Apply Model" width="90" x="45" y="30">
            <list key="application_parameters"/>
          </operator>
          <operator activated="true" class="performance_classification" compatibility="5.3.013" expanded="true" height="76" name="Performance" width="90" x="198" y="30">
            <parameter key="classification_error" value="true"/>
            <parameter key="weighted_mean_recall" value="true"/>
            <parameter key="weighted_mean_precision" value="true"/>
            <list key="class_weights"/>
          </operator>
          <connect from_port="model" to_op="Apply Model" to_port="model"/>
          <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
          <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
          <connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
          <portSpacing port="source_model" spacing="0"/>
          <portSpacing port="source_test set" spacing="0"/>
          <portSpacing port="source_through 1" spacing="0"/>
          <portSpacing port="sink_averagable 1" spacing="0"/>
          <portSpacing port="sink_averagable 2" spacing="0"/>
        </process>
      </operator>
      <connect from_op="Process Categories" from_port="example set" to_op="Validation" to_port="training"/>
      <connect from_op="Validation" from_port="model" to_port="result 1"/>
      <connect from_op="Validation" from_port="training" to_port="result 2"/>
      <connect from_op="Validation" from_port="averagable 1" to_port="result 3"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
      <portSpacing port="sink_result 4" spacing="0"/>
    </process>
  </operator>
</process>

 

Best Answer

  • Options
    MartinLiebigMartinLiebig Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor Posts: 3,507 RM Data Scientist
    Solution Accepted

    Hi

     

    i've checked it again. it is the other way around. you need to have a nominal coloum with only one class as label. See attached process.

     

    ~Martin

     

    Spoiler
    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="7.1.001">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="7.1.001" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="retrieve" compatibility="7.1.001" expanded="true" height="68" name="Retrieve Golf" width="90" x="45" y="34">
    <parameter key="repository_entry" value="//Samples/data/Golf"/>
    </operator>
    <operator activated="true" class="nominal_to_numerical" compatibility="7.1.001" expanded="true" height="103" name="Nominal to Numerical" width="90" x="179" y="34">
    <list key="comparison_groups"/>
    </operator>
    <operator activated="true" class="select_attributes" compatibility="7.1.001" expanded="true" height="82" name="Select Attributes" width="90" x="313" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="Play"/>
    <parameter key="invert_selection" value="true"/>
    <parameter key="include_special_attributes" value="true"/>
    </operator>
    <operator activated="true" class="generate_attributes" compatibility="7.1.001" expanded="true" height="82" name="Generate Attributes" width="90" x="447" y="34">
    <list key="function_descriptions">
    <parameter key="label" value="&quot;a label&quot;"/>
    </list>
    </operator>
    <operator activated="true" class="set_role" compatibility="7.1.001" expanded="true" height="82" name="Set Role" width="90" x="581" y="34">
    <parameter key="attribute_name" value="label"/>
    <parameter key="target_role" value="label"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="support_vector_machine_libsvm" compatibility="7.1.001" expanded="true" height="82" name="SVM" width="90" x="782" y="34">
    <parameter key="svm_type" value="one-class"/>
    <list key="class_weights"/>
    </operator>
    <connect from_op="Retrieve Golf" from_port="output" to_op="Nominal to Numerical" to_port="example set input"/>
    <connect from_op="Nominal to Numerical" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
    <connect from_op="Select Attributes" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
    <connect from_op="Generate Attributes" from_port="example set output" to_op="Set Role" to_port="example set input"/>
    <connect from_op="Set Role" from_port="example set output" to_op="SVM" to_port="training set"/>
    <connect from_op="SVM" from_port="model" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    </process>
    </operator>
    </process>
    - Sr. Director Data Solutions, Altair RapidMiner -
    Dortmund, Germany

Answers

  • Options
    bhupendra_patilbhupendra_patil Administrator, Employee, Member Posts: 168 RM Data Scientist

    hello @In777

     

    Not sure if I this solves your issue, but typical problems when doing type conversion and your column is  has a special role (e.g columns like labels , weights etc.) is folks do not check the "inclide special attributes" checkbox.

     

    Can you confirm if that was not the case

  • Options
    MartinLiebigMartinLiebig Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor Posts: 3,507 RM Data Scientist

    Dear @In777,

     

    thanks for reaching out. The LibSVM One class svm is sometimes a bit confusing. You acutually need to have a binary label to do it. The second class needs to have 0 examples.

     

    ~Martin

    - Sr. Director Data Solutions, Altair RapidMiner -
    Dortmund, Germany
  • Options
    In777In777 Member Posts: 29 Contributor II

    I do not think that the problem is with the transformation. I modified my workflow, but it still does not work.  I got a message about bug report. Here is my xml:

    l version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="7.1.001">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="text:process_document_from_file" compatibility="7.1.001" expanded="true" height="82" name="Process Categories" width="90" x="112" y="85">
    <list key="text_directories">
    <parameter key="Energy" value="D:\Categories\Energy"/>
    <parameter key="Education" value="D:\Categories\Education"/>
    </list>
    <parameter key="keep_text" value="true"/>
    <process expanded="true">
    <operator activated="true" class="text:tokenize" compatibility="7.1.001" expanded="true" height="68" name="Tokenize" width="90" x="45" y="34"/>
    <operator activated="true" class="text:filter_stopwords_english" compatibility="7.1.001" expanded="true" height="68" name="Filter Stopwords (English)" width="90" x="179" y="34"/>
    <operator activated="true" class="text:stem_porter" compatibility="7.1.001" expanded="true" height="68" name="Stem (Porter)" width="90" x="313" y="34"/>
    <operator activated="true" class="text:generate_n_grams_terms" compatibility="7.1.001" expanded="true" height="68" name="Generate n-Grams (Terms)" width="90" x="447" y="34">
    <parameter key="max_length" value="4"/>
    </operator>
    <connect from_port="document" to_op="Tokenize" to_port="document"/>
    <connect from_op="Tokenize" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
    <connect from_op="Filter Stopwords (English)" from_port="document" to_op="Stem (Porter)" to_port="document"/>
    <connect from_op="Stem (Porter)" from_port="document" to_op="Generate n-Grams (Terms)" to_port="document"/>
    <connect from_op="Generate n-Grams (Terms)" from_port="document" to_port="document 1"/>
    <portSpacing port="source_document" spacing="0"/>
    <portSpacing port="sink_document 1" spacing="0"/>
    <portSpacing port="sink_document 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="x_validation" compatibility="7.1.001" expanded="true" height="124" name="Validation" width="90" x="380" y="85">
    <parameter key="number_of_validations" value="3"/>
    <process expanded="true">
    <operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="103" name="Filter Examples" width="90" x="45" y="34">
    <parameter key="parameter_string" value="label=Energy"/>
    <parameter key="condition_class" value="attribute_value_filter"/>
    <list key="filters_list"/>
    </operator>
    <operator activated="true" class="select_attributes" compatibility="7.1.001" expanded="true" height="82" name="Select Attributes" width="90" x="180" y="30">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="label"/>
    <parameter key="invert_selection" value="true"/>
    <parameter key="include_special_attributes" value="true"/>
    </operator>
    <operator activated="true" class="generate_attributes" compatibility="6.4.000" expanded="true" height="82" name="Generate Attributes" width="90" x="45" y="187">
    <list key="function_descriptions">
    <parameter key="label" value="&quot;Energy&quot;"/>
    </list>
    </operator>
    <operator activated="true" class="set_role" compatibility="5.3.013" expanded="true" height="82" name="Set Role" width="90" x="179" y="187">
    <parameter key="attribute_name" value="label"/>
    <parameter key="target_role" value="label"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="support_vector_machine_libsvm" compatibility="7.1.001" expanded="true" height="82" name="SVM" width="90" x="313" y="34">
    <parameter key="svm_type" value="one-class"/>
    <list key="class_weights"/>
    </operator>
    <connect from_port="training" to_op="Filter Examples" to_port="example set input"/>
    <connect from_op="Filter Examples" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
    <connect from_op="Select Attributes" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
    <connect from_op="Generate Attributes" from_port="example set output" to_op="Set Role" to_port="example set input"/>
    <connect from_op="Set Role" from_port="example set output" to_op="SVM" to_port="training set"/>
    <connect from_op="SVM" from_port="model" to_port="model"/>
    <portSpacing port="source_training" spacing="0"/>
    <portSpacing port="sink_model" spacing="0"/>
    <portSpacing port="sink_through 1" spacing="0"/>
    </process>
    <process expanded="true">
    <operator activated="true" class="apply_model" compatibility="7.1.001" expanded="true" height="82" name="Apply Model" width="90" x="45" y="34">
    <list key="application_parameters"/>
    </operator>
    <operator activated="true" class="nominal_to_binominal" compatibility="7.1.001" expanded="true" height="103" name="Nominal to Binominal" width="90" x="179" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="prediction(label)"/>
    <parameter key="include_special_attributes" value="true"/>
    </operator>
    <operator activated="true" class="performance" compatibility="7.1.001" expanded="true" height="82" name="Performance" width="90" x="313" y="34">
    <parameter key="use_example_weights" value="false"/>
    </operator>
    <connect from_port="model" to_op="Apply Model" to_port="model"/>
    <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
    <connect from_op="Apply Model" from_port="labelled data" to_op="Nominal to Binominal" to_port="example set input"/>
    <connect from_op="Nominal to Binominal" from_port="example set output" to_op="Performance" to_port="labelled data"/>
    <connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
    <portSpacing port="source_model" spacing="0"/>
    <portSpacing port="source_test set" spacing="0"/>
    <portSpacing port="source_through 1" spacing="0"/>
    <portSpacing port="sink_averagable 1" spacing="0"/>
    <portSpacing port="sink_averagable 2" spacing="0"/>
    </process>
    </operator>
    <connect from_op="Process Categories" from_port="example set" to_op="Validation" to_port="training"/>
    <connect from_op="Validation" from_port="model" to_port="result 1"/>
    <connect from_op="Validation" from_port="training" to_port="result 2"/>
    <connect from_op="Validation" from_port="averagable 1" to_port="result 3"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    <portSpacing port="sink_result 3" spacing="0"/>
    <portSpacing port="sink_result 4" spacing="0"/>
    </process>
    </operator>
    </process>
  • Options
    In777In777 Member Posts: 29 Contributor II

    Thank you for your suggestion. I've tried to leave one of the directories with the examples empty, unfortunately that did not help. I still get a bug report (see my previous post).

Sign In or Register to comment.