🎉 🎉   RAPIDMINER 9.5 BETA IS OUT!!!   🎉 🎉

GRAB THE HOTTEST NEW BETA OF RAPIDMINER STUDIO, SERVER, AND RADOOP. LET US KNOW WHAT YOU THINK!

CLICK HERE TO DOWNLOAD

🦉 🎤   RapidMiner Wisdom 2020 - CALL FOR SPEAKERS   🦉 🎤

We are inviting all community members to submit proposals to speak at Wisdom 2020 in Boston.


Whether it's a cool RapidMiner trick or a use case implementation, we want to see what you have.
Form link is below and deadline for submissions is November 15. See you in Boston!

CLICK HERE TO GO TO ENTRY FORM

Problem with "right" Classification

platanas20platanas20 Member Posts: 22  Maven
Hello again,

I created a project which i use the operator "Process Documents From Files" and i added 2 classes(negative and positive).Each folder consists of 50 txt with comments.Also i use the operator"Read Document" which reads a negative comment.My final purpose is to classify this comment in the class of the negative comments.But the result isn't right and the negative comment seems to be positive.Plz take a look to the code and post any suggestion you may have!And something else..Is there any classifier which does this job better than Naive Bayes?

Thanks a lot!!
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.1.008">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.1.008" expanded="true" name="Process">
    <parameter key="encoding" value="UTF-8"/>
    <process expanded="true" height="494" width="681">
      <operator activated="true" class="text:read_document" compatibility="5.1.002" expanded="true" height="60" name="Read Document" width="90" x="45" y="210">
        <parameter key="file" value="H:\Epifaneia Ergasias\RapidMiner\negative_comment.txt"/>
        <parameter key="encoding" value="UTF-8"/>
      </operator>
      <operator activated="true" class="text:process_documents" compatibility="5.1.002" expanded="true" height="94" name="Process Documents" width="90" x="179" y="300">
        <parameter key="parallelize_vector_creation" value="true"/>
        <process expanded="true" height="519" width="561">
          <operator activated="true" class="text:tokenize" compatibility="5.1.002" expanded="true" height="60" name="Tokenize (2)" width="90" x="45" y="30"/>
          <operator activated="true" class="text:filter_stopwords_dictionary" compatibility="5.1.002" expanded="true" height="60" name="Filter Stopwords (2)" width="90" x="246" y="30">
            <parameter key="file" value="H:\Epifaneia Ergasias\RapidMiner\STOPWORDS.txt"/>
            <parameter key="encoding" value="UTF-8"/>
          </operator>
          <operator activated="true" class="text:transform_cases" compatibility="5.1.002" expanded="true" height="60" name="Transform Cases" width="90" x="380" y="30"/>
          <connect from_port="document" to_op="Tokenize (2)" to_port="document"/>
          <connect from_op="Tokenize (2)" from_port="document" to_op="Filter Stopwords (2)" to_port="document"/>
          <connect from_op="Filter Stopwords (2)" from_port="document" to_op="Transform Cases" to_port="document"/>
          <connect from_op="Transform Cases" from_port="document" to_port="document 1"/>
          <portSpacing port="source_document" spacing="0"/>
          <portSpacing port="sink_document 1" spacing="0"/>
          <portSpacing port="sink_document 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="text:process_document_from_file" compatibility="5.1.002" expanded="true" height="76" name="Process Documents from Files" width="90" x="112" y="75">
        <list key="text_directories">
          <parameter key="Positive" value="H:\Epifaneia Ergasias\RapidMiner\Positive"/>
          <parameter key="Negative" value="H:\Epifaneia Ergasias\RapidMiner\Negative"/>
        </list>
        <parameter key="encoding" value="UTF-8"/>
        <parameter key="parallelize_vector_creation" value="true"/>
        <process expanded="true">
          <operator activated="true" class="text:tokenize" compatibility="5.1.002" expanded="true" name="Tokenize"/>
          <operator activated="true" class="text:filter_stopwords_dictionary" compatibility="5.1.002" expanded="true" name="Filter Stopwords (Dictionary)">
            <parameter key="file" value="H:/Epifaneia Ergasias/RapidMiner/STOPWORDS.txt"/>
            <parameter key="encoding" value="UTF-8"/>
          </operator>
          <operator activated="true" class="text:transform_cases" compatibility="5.1.002" expanded="true" name="Transform Cases (2)"/>
          <connect from_port="document" to_op="Tokenize" to_port="document"/>
          <connect from_op="Tokenize" from_port="document" to_op="Filter Stopwords (Dictionary)" to_port="document"/>
          <connect from_op="Filter Stopwords (Dictionary)" from_port="document" to_op="Transform Cases (2)" to_port="document"/>
          <connect from_op="Transform Cases (2)" from_port="document" to_port="document 1"/>
          <portSpacing port="source_document" spacing="0"/>
          <portSpacing port="sink_document 1" spacing="0"/>
          <portSpacing port="sink_document 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="x_validation" compatibility="5.1.008" expanded="true" height="112" name="Validation" width="90" x="313" y="120">
        <parameter key="parallelize_training" value="true"/>
        <parameter key="parallelize_testing" value="true"/>
        <process expanded="true" height="428" width="370">
          <operator activated="true" class="naive_bayes" compatibility="5.1.008" expanded="true" height="76" name="Naive Bayes" width="90" x="90" y="57"/>
          <connect from_port="training" to_op="Naive Bayes" to_port="training set"/>
          <connect from_op="Naive Bayes" from_port="model" to_port="model"/>
          <portSpacing port="source_training" spacing="0"/>
          <portSpacing port="sink_model" spacing="0"/>
          <portSpacing port="sink_through 1" spacing="0"/>
        </process>
        <process expanded="true" height="428" width="370">
          <operator activated="true" class="apply_model" compatibility="5.1.008" expanded="true" height="76" name="Apply Model (2)" width="90" x="45" y="30">
            <list key="application_parameters"/>
          </operator>
          <operator activated="true" class="performanceevaluator" compatibility="5.1.008" expanded="true" height="76" name="PerformanceEvaluator" width="90" x="212" y="30">
            <parameter key="accuracy" value="true"/>
            <list key="class_weights"/>
          </operator>
          <connect from_port="model" to_op="Apply Model (2)" to_port="model"/>
          <connect from_port="test set" to_op="Apply Model (2)" to_port="unlabelled data"/>
          <connect from_op="Apply Model (2)" from_port="labelled data" to_op="PerformanceEvaluator" to_port="labelled data"/>
          <connect from_op="PerformanceEvaluator" from_port="performance" to_port="averagable 1"/>
          <portSpacing port="source_model" spacing="0"/>
          <portSpacing port="source_test set" spacing="0"/>
          <portSpacing port="source_through 1" spacing="0"/>
          <portSpacing port="sink_averagable 1" spacing="0"/>
          <portSpacing port="sink_averagable 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="apply_model" compatibility="5.1.008" expanded="true" height="76" name="Apply Model" width="90" x="514" y="300">
        <list key="application_parameters"/>
      </operator>
      <connect from_port="input 1" to_op="Process Documents from Files" to_port="word list"/>
      <connect from_op="Read Document" from_port="output" to_op="Process Documents" to_port="documents 1"/>
      <connect from_op="Process Documents" from_port="example set" to_op="Apply Model" to_port="unlabelled data"/>
      <connect from_op="Process Documents from Files" from_port="example set" to_op="Validation" to_port="training"/>
      <connect from_op="Validation" from_port="model" to_op="Apply Model" to_port="model"/>
      <connect from_op="Validation" from_port="averagable 1" to_port="result 1"/>
      <connect from_op="Apply Model" from_port="labelled data" to_port="result 2"/>
      <connect from_op="Apply Model" from_port="model" to_port="result 3"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="source_input 2" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
      <portSpacing port="sink_result 4" spacing="0"/>
    </process>
  </operator>
</process>

Answers

  • colocolo Member Posts: 236  Guru
    Hi platanas,

    did you already try a SVM classifier? As far as I know, they are not bad when classifying documents. But I'm not really familiar with classification tasks, I am recently working in the information extraction sector.

    RapidMiner makes it pretty easy to test another method, so perhaps just give it a try?

    Regards
    Matthias
Sign In or Register to comment.