Options

Information Extraction plugin help in RapidMiner

spykeburnspykeburn Member Posts: 4 Contributor I
edited November 2018 in Help
Im trying to apply text mining to a text document with the help of Information Extraction plugin. I dont know which operators to use and how. Please help how do I proceed further?




 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.015">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.3.015" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="text:read_document" compatibility="5.3.002" expanded="true" height="60" name="Read Document" width="90" x="45" y="30">
        <parameter key="file" value="C:\Users\Shireen\Desktop\times manifesto.txt"/>
      </operator>
      <operator activated="true" class="text:process_documents" compatibility="5.3.002" expanded="true" height="94" name="Process Documents" width="90" x="179" y="75">
        <process expanded="true">
          <connect from_port="document" to_port="document 1"/>
          <portSpacing port="source_document" spacing="0"/>
          <portSpacing port="sink_document 1" spacing="0"/>
          <portSpacing port="sink_document 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="information_extraction:sentence_tokenizer" compatibility="1.0.000" expanded="true" height="76" name="SentenceTokenizer" width="90" x="380" y="75">
        <parameter key="optionalAttribute" value="and"/>
      </operator>
      <operator activated="true" class="information_extraction:text_annotator" compatibility="1.0.000" expanded="true" height="76" name="TextAnnotator" width="90" x="514" y="120">
        <parameter key="repository-entry" value="and"/>
        <parameter key="text-attribute" value="name"/>
        <parameter key="label-attribute" value="no"/>
      </operator>
      <connect from_op="Read Document" from_port="output" to_op="Process Documents" to_port="documents 1"/>
      <connect from_op="Process Documents" from_port="example set" to_op="SentenceTokenizer" to_port="example set input"/>
      <connect from_op="SentenceTokenizer" from_port="example set output" to_op="TextAnnotator" to_port="example set input"/>
      <connect from_op="TextAnnotator" from_port="example set output" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>
Sign In or Register to comment.