Options

[SOLVED] input example set has no attribute

lavramulavramu Member Posts: 16 Contributor II
edited November 2018 in Help
Hi,

I am trying to classify some documents using naive bayes . First training the algorithm and testing.  I get an error which says -- "Input example set has no attributes ". "Learning schemes cannot be applied without atleast one attribute" and the offending operator is naive bayes (kernel).

I also see that my example set has the text and the meta data as the attributes , but after executing the process from documents ..the example set shows 5 special attributes 0 regular attributes. No idea why

I am not able to debug. I dont know why I keep getting stuck like this.

This is my xml

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.013">
 <context>
   <input/>
   <output/>
   <macros/>
 </context>
 <operator activated="true" class="process" compatibility="5.3.013" expanded="true" name="Process">
   <process expanded="true">
     <operator activated="true" class="text:process_document_from_file" compatibility="5.3.001" expanded="true" height="76" name="Process Documents from Files" width="90" x="45" y="30">
       <list key="text_directories">
         <parameter key="low" value="C:\Users\Uma\Desktop\nvivo\Low"/>
         <parameter key="high" value="C:\Users\Uma\Desktop\nvivo\High"/>
         <parameter key="verylow" value="C:\Users\Uma\Desktop\nvivo\VeryLow"/>
         <parameter key="veryhigh" value="C:\Users\Uma\Desktop\nvivo\VeryHigh"/>
         <parameter key="mediumhigh" value="C:\Users\Uma\Desktop\nvivo\MediumHigh"/>
         <parameter key="mediumlow" value="C:\Users\Uma\Desktop\nvivo\MediumLow"/>
       </list>
       <parameter key="use_file_extension_as_type" value="false"/>
       <parameter key="create_word_vector" value="false"/>
       <parameter key="keep_text" value="true"/>
       <process expanded="true">
         <connect from_port="document" to_port="document 1"/>
         <portSpacing port="source_document" spacing="0"/>
         <portSpacing port="sink_document 1" spacing="0"/>
         <portSpacing port="sink_document 2" spacing="0"/>
       </process>
     </operator>
     <operator activated="true" class="set_role" compatibility="5.3.013" expanded="true" height="76" name="Set Role" width="90" x="179" y="30">
       <parameter key="attribute_name" value="text"/>
       <list key="set_additional_roles">
         <parameter key="text" value="regular"/>
       </list>
     </operator>
     <operator activated="true" class="text:process_document_from_data" compatibility="5.3.001" expanded="true" height="76" name="Process Documents from Data" width="90" x="313" y="30">
       <parameter key="create_word_vector" value="false"/>
       <parameter key="keep_text" value="true"/>
       <parameter key="select_attributes_and_weights" value="true"/>
       <list key="specify_weights">
         <parameter key="text" value="1.0"/>
       </list>
       <process expanded="true">
         <operator activated="true" class="text:transform_cases" compatibility="5.3.001" expanded="true" height="60" name="Transform Cases" width="90" x="45" y="30"/>
         <operator activated="true" class="text:replace_tokens" compatibility="5.3.001" expanded="true" height="60" name="Replace Tokens" width="90" x="179" y="30">
           <list key="replace_dictionary">
             <parameter key="reference.*coverage" value=" "/>
             <parameter key="&lt;internals.*]" value=" "/>
             <parameter key="&lt;page&gt;" value=" "/>
           </list>
         </operator>
         <operator activated="true" class="text:tokenize" compatibility="5.3.001" expanded="true" height="60" name="Tokenize" width="90" x="313" y="30">
           <parameter key="mode" value="linguistic sentences"/>
         </operator>
         <operator activated="true" class="text:filter_stopwords_english" compatibility="5.3.001" expanded="true" height="60" name="Filter Stopwords (English)" width="90" x="112" y="165"/>
         <operator activated="true" class="text:stem_porter" compatibility="5.3.001" expanded="true" height="60" name="Stem (Porter)" width="90" x="313" y="165"/>
         <connect from_port="document" to_op="Transform Cases" to_port="document"/>
         <connect from_op="Transform Cases" from_port="document" to_op="Replace Tokens" to_port="document"/>
         <connect from_op="Replace Tokens" from_port="document" to_op="Tokenize" to_port="document"/>
         <connect from_op="Tokenize" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
         <connect from_op="Filter Stopwords (English)" from_port="document" to_op="Stem (Porter)" to_port="document"/>
         <connect from_op="Stem (Porter)" from_port="document" to_port="document 1"/>
         <portSpacing port="source_document" spacing="0"/>
         <portSpacing port="sink_document 1" spacing="0"/>
         <portSpacing port="sink_document 2" spacing="0"/>
       </process>
     </operator>
     <operator activated="true" class="x_validation" compatibility="5.3.013" expanded="true" height="112" name="Validation" width="90" x="380" y="165">
       <process expanded="true">
         <operator activated="true" class="naive_bayes_kernel" compatibility="5.3.013" expanded="true" height="76" name="Naive Bayes (Kernel)" width="90" x="45" y="30"/>
         <connect from_port="training" to_op="Naive Bayes (Kernel)" to_port="training set"/>
         <connect from_op="Naive Bayes (Kernel)" from_port="model" to_port="model"/>
         <portSpacing port="source_training" spacing="0"/>
         <portSpacing port="sink_model" spacing="0"/>
         <portSpacing port="sink_through 1" spacing="0"/>
       </process>
       <process expanded="true">
         <operator activated="true" class="apply_model" compatibility="5.3.013" expanded="true" height="76" name="Apply Model" width="90" x="112" y="30">
           <list key="application_parameters"/>
         </operator>
         <operator activated="true" class="performance_classification" compatibility="5.3.013" expanded="true" height="76" name="Performance" width="90" x="45" y="165">
           <list key="class_weights"/>
         </operator>
         <connect from_port="model" to_op="Apply Model" to_port="model"/>
         <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
         <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
         <connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
         <portSpacing port="source_model" spacing="0"/>
         <portSpacing port="source_test set" spacing="0"/>
         <portSpacing port="source_through 1" spacing="0"/>
         <portSpacing port="sink_averagable 1" spacing="0"/>
         <portSpacing port="sink_averagable 2" spacing="0"/>
       </process>
     </operator>
     <connect from_op="Process Documents from Files" from_port="example set" to_op="Set Role" to_port="example set input"/>
     <connect from_op="Set Role" from_port="example set output" to_op="Process Documents from Data" to_port="example set"/>
     <connect from_op="Process Documents from Data" from_port="example set" to_op="Validation" to_port="training"/>
     <connect from_op="Validation" from_port="model" to_port="result 2"/>
     <connect from_op="Validation" from_port="training" to_port="result 1"/>
     <connect from_op="Validation" from_port="averagable 1" to_port="result 3"/>
     <portSpacing port="source_input 1" spacing="0"/>
     <portSpacing port="sink_result 1" spacing="0"/>
     <portSpacing port="sink_result 2" spacing="0"/>
     <portSpacing port="sink_result 3" spacing="0"/>
     <portSpacing port="sink_result 4" spacing="0"/>
   </process>
 </operator>
</process>

Answers

  • Options
    MariusHelfMariusHelf RapidMiner Certified Expert, Member Posts: 1,869 Unicorn
    Hi,

    to make things short, you have to create the word vector in Process Documents - activate the respective parameter there.

    Additionally, you can totally remove the second Process Documents operator and move everything inside it into the first Process Documents operator.

    Furthermore you need to define the target variable, i.e. the label, of your data set. Use the Set Role operator for that.

    Best regards,
    Marius
  • Options
    lavramulavramu Member Posts: 16 Contributor II
    thanks but the problem is I do not need word vector. I tokenize based on lingusitic sentences and not words.
    I think i found the problem... 

    I was applying a 10-fold X-Validation on a dataset with very few examples...when I reduced the N it went through but now I have a new problem. I will probably post that as a separate thread.
Sign In or Register to comment.