Text Mining Use Cases and Capabilities with RapidMiner

yyhuang · April 2018

Screen Shot 2018-04-12 at 9.16.24 AM.png

Attached is the slide deck that summarizes the major functions and techniques offered by the Text Processing, Web Mining, and Operator Toolbox extensions.

Telcontar120 · April 2018

@yyhuang looks like some great new applications here. The presentation contains screenshots that appear to be of annotated processes---are there any new templates or examples that correspond to some of the newer capabilities described in the deck available for download?

Thomas_Ott · April 2018

Thanks @yyhuang, this has inspired me to upgrade my Twitter Content models.

yyhuang · April 2018

Thanks for your interests! Of course, I will share some template process as supplemental files to the slides.

Telcontar120 · April 2018

@yyhuang any ETA on those template processes? :-)

Thanks!

yyhuang · April 2018

<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="8.1.003" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="8.1.003" expanded="true" height="68" name="Retrieve Historical Sentiments" width="90" x="45" y="85">
        <parameter key="repository_entry" value="//Samples/Templates/Sentiment Analysis/Historical Sentiments"/>
      </operator>
      <operator activated="true" class="nominal_to_text" compatibility="8.1.003" expanded="true" height="82" name="Nominal to Text" width="90" x="179" y="85">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="Text"/>
      </operator>
      <operator activated="true" class="text:data_to_documents" compatibility="8.1.000" expanded="true" height="68" name="Data to Documents" width="90" x="313" y="85">
        <list key="specify_weights"/>
      </operator>
      <operator activated="true" class="text:process_documents" compatibility="8.1.000" expanded="true" height="103" name="Process Documents" width="90" x="447" y="85">
        <process expanded="true">
          <operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize" width="90" x="112" y="34"/>
          <operator activated="true" class="text:transform_cases" compatibility="8.1.000" expanded="true" height="68" name="Transform Cases" width="90" x="313" y="34"/>
          <operator activated="true" class="text:filter_stopwords_english" compatibility="8.1.000" expanded="true" height="68" name="Filter Stopwords (English)" width="90" x="514" y="34"/>
          <operator activated="true" class="wordnet:open_wordnet_dictionary" compatibility="5.3.000" expanded="true" height="68" name="Open WordNet Dictionary" width="90" x="514" y="136">
            <parameter key="directory" value="C:\Users\YuanyuanHuang\Documents\WordNet-3.0\dict"/>
          </operator>
          <operator activated="true" class="wordnet:stem_wordnet" compatibility="5.3.000" expanded="true" height="82" name="Stem (WordNet)" width="90" x="715" y="34"/>
          <connect from_port="document" to_op="Tokenize" to_port="document"/>
          <connect from_op="Tokenize" from_port="document" to_op="Transform Cases" to_port="document"/>
          <connect from_op="Transform Cases" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
          <connect from_op="Filter Stopwords (English)" from_port="document" to_op="Stem (WordNet)" to_port="document"/>
          <connect from_op="Open WordNet Dictionary" from_port="dictionary" to_op="Stem (WordNet)" to_port="dictionary"/>
          <connect from_op="Stem (WordNet)" from_port="document" to_port="document 1"/>
          <portSpacing port="source_document" spacing="0"/>
          <portSpacing port="sink_document 1" spacing="0"/>
          <portSpacing port="sink_document 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="singular_value_decomposition" compatibility="8.1.003" expanded="true" height="103" name="SVD" width="90" x="581" y="85">
        <parameter key="dimensionality_reduction" value="keep percentage"/>
        <parameter key="percentage_threshold" value="0.6"/>
      </operator>
      <connect from_op="Retrieve Historical Sentiments" from_port="output" to_op="Nominal to Text" to_port="example set input"/>
      <connect from_op="Nominal to Text" from_port="example set output" to_op="Data to Documents" to_port="example set"/>
      <connect from_op="Data to Documents" from_port="documents" to_op="Process Documents" to_port="documents 1"/>
      <connect from_op="Process Documents" from_port="example set" to_op="SVD" to_port="example set input"/>
      <connect from_op="SVD" from_port="example set output" to_port="result 1"/>
      <connect from_op="SVD" from_port="original" to_port="result 2"/>
      <connect from_op="SVD" from_port="preprocessing model" to_port="result 3"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="42"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
      <portSpacing port="sink_result 4" spacing="21"/>
      <description align="center" color="yellow" colored="false" height="63" resized="true" width="455" x="263" y="10">LSA is quite simple, you just use SVD to perform dimensionality reduction on the tf-idf vectors&amp;#8211;that&amp;#8217;s really all there is to it!</description>
    </process>
  </operator>
</process>

LSA or LSI

yyhuang · April 2018

<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="8.1.003" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="8.1.003" expanded="true" height="68" name="Retrieve Historical Sentiments" width="90" x="45" y="34">
        <parameter key="repository_entry" value="//Samples/Templates/Sentiment Analysis/Historical Sentiments"/>
      </operator>
      <operator activated="true" class="nominal_to_text" compatibility="8.1.003" expanded="true" height="82" name="Nominal to Text" width="90" x="179" y="34">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="Text"/>
      </operator>
      <operator activated="true" class="text:data_to_documents" compatibility="8.1.000" expanded="true" height="68" name="Data to Documents" width="90" x="313" y="34">
        <list key="specify_weights"/>
      </operator>
      <operator activated="true" class="loop_collection" compatibility="8.1.003" expanded="true" height="82" name="Loop Collection" width="90" x="447" y="34">
        <process expanded="true">
          <operator activated="true" class="retrieve" compatibility="8.1.003" expanded="true" height="68" name="Retrieve filter token" width="90" x="380" y="136">
            <parameter key="repository_entry" value="filter token"/>
          </operator>
          <operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize (2)" width="90" x="45" y="34">
            <parameter key="mode" value="specify characters"/>
            <parameter key="characters" value=".:, "/>
          </operator>
          <operator activated="true" class="text:transform_cases" compatibility="8.1.000" expanded="true" height="68" name="Transform Cases" width="90" x="179" y="34"/>
          <operator activated="true" class="wordnet:open_wordnet_dictionary" compatibility="5.3.000" expanded="true" height="68" name="Open WordNet Dictionary" width="90" x="179" y="136">
            <parameter key="directory" value="C:\Users\YuanyuanHuang\Documents\WordNet-3.0\dict"/>
          </operator>
          <operator activated="true" class="wordnet:stem_wordnet" compatibility="5.3.000" expanded="true" height="82" name="Stem (WordNet)" width="90" x="380" y="34"/>
          <operator activated="true" class="operator_toolbox:filter_tokens_using_exampleset" compatibility="1.0.000" expanded="true" height="82" name="Filter Tokens Using ExampleSet" width="90" x="581" y="34">
            <parameter key="attribute" value="word"/>
          </operator>
          <operator activated="false" class="text:generate_n_grams_terms" compatibility="8.1.000" expanded="true" height="68" name="Generate n-Grams (Terms)" width="90" x="715" y="34">
            <parameter key="max_length" value="5"/>
            <description align="center" color="transparent" colored="false" width="126">generate n-grams with max length of 5</description>
          </operator>
          <connect from_port="single" to_op="Tokenize (2)" to_port="document"/>
          <connect from_op="Retrieve filter token" from_port="output" to_op="Filter Tokens Using ExampleSet" to_port="example set"/>
          <connect from_op="Tokenize (2)" from_port="document" to_op="Transform Cases" to_port="document"/>
          <connect from_op="Transform Cases" from_port="document" to_op="Stem (WordNet)" to_port="document"/>
          <connect from_op="Open WordNet Dictionary" from_port="dictionary" to_op="Stem (WordNet)" to_port="dictionary"/>
          <connect from_op="Stem (WordNet)" from_port="document" to_op="Filter Tokens Using ExampleSet" to_port="document"/>
          <connect from_op="Filter Tokens Using ExampleSet" from_port="document" to_port="output 1"/>
          <portSpacing port="source_single" spacing="0"/>
          <portSpacing port="sink_output 1" spacing="0"/>
          <portSpacing port="sink_output 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="operator_toolbox:lda" compatibility="1.0.000" expanded="true" height="124" name="LDA" width="90" x="581" y="34">
        <parameter key="number_of_topics" value="5"/>
        <parameter key="use_alpha_heuristics" value="false"/>
        <parameter key="alpha" value="50.0"/>
        <parameter key="top_words_per_topic" value="15"/>
        <parameter key="include_meta_data" value="false"/>
      </operator>
      <connect from_op="Retrieve Historical Sentiments" from_port="output" to_op="Nominal to Text" to_port="example set input"/>
      <connect from_op="Nominal to Text" from_port="example set output" to_op="Data to Documents" to_port="example set"/>
      <connect from_op="Data to Documents" from_port="documents" to_op="Loop Collection" to_port="collection"/>
      <connect from_op="Loop Collection" from_port="output 1" to_op="LDA" to_port="col"/>
      <connect from_op="LDA" from_port="exa" to_port="result 1"/>
      <connect from_op="LDA" from_port="top" to_port="result 2"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
    </process>
  </operator>
</process>

LDA

The used filter on token is a data like

and
the
you
for
in
on
from
of
am
is
was
are
be
i
that
with
very
really
can
has
will
this
they

yyhuang · April 2018

<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="8.1.003" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="8.1.003" expanded="true" height="68" name="Retrieve wordlist_result_entity_company_name" width="90" x="45" y="85">
        <parameter key="repository_entry" value="wordlist_result_entity_company_name"/>
      </operator>
      <operator activated="true" class="subprocess" compatibility="8.1.003" expanded="true" height="82" name="Get News Feeds" width="90" x="45" y="187">
        <process expanded="true">
          <operator activated="true" class="web:read_rss" compatibility="7.3.000" expanded="true" height="68" name="BBC Top Stories" width="90" x="45" y="34">
            <parameter key="url" value="http://feeds.bbci.co.uk/news/rss.xml"/>
          </operator>
          <operator activated="true" class="web:read_rss" compatibility="7.3.000" expanded="true" height="68" name="BBC Asia" width="90" x="45" y="85">
            <parameter key="url" value="http://feeds.bbci.co.uk/news/world/asia/rss.xml"/>
          </operator>
          <operator activated="true" class="web:read_rss" compatibility="7.3.000" expanded="true" height="68" name="BBC Business" width="90" x="45" y="136">
            <parameter key="url" value="http://feeds.bbci.co.uk/news/business/rss.xml"/>
          </operator>
          <operator activated="true" class="web:read_rss" compatibility="7.3.000" expanded="true" height="68" name="BBC Entertainment" width="90" x="45" y="187">
            <parameter key="url" value="http://feeds.bbci.co.uk/news/entertainment_and_arts/rss.xml"/>
          </operator>
          <operator activated="true" class="append" compatibility="8.1.003" expanded="true" height="145" name="Append" width="90" x="179" y="34"/>
          <operator activated="true" class="generate_copy" compatibility="8.1.003" expanded="true" height="82" name="Generate Copy" width="90" x="313" y="34">
            <parameter key="attribute_name" value="Title"/>
            <parameter key="new_name" value="Title2"/>
          </operator>
          <operator activated="true" class="text_to_nominal" compatibility="8.1.003" expanded="true" height="82" name="Text to Nominal" width="90" x="447" y="34">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attributes" value="Link|Title2"/>
            <description align="center" color="transparent" colored="false" width="126">Don't convert article link to document text.</description>
          </operator>
          <operator activated="true" class="rename" compatibility="8.1.003" expanded="true" height="82" name="Rename (2)" width="90" x="581" y="34">
            <parameter key="old_name" value="Id"/>
            <parameter key="new_name" value="id"/>
            <list key="rename_additional_attributes"/>
          </operator>
          <connect from_op="BBC Top Stories" from_port="output" to_op="Append" to_port="example set 1"/>
          <connect from_op="BBC Asia" from_port="output" to_op="Append" to_port="example set 2"/>
          <connect from_op="BBC Business" from_port="output" to_op="Append" to_port="example set 3"/>
          <connect from_op="BBC Entertainment" from_port="output" to_op="Append" to_port="example set 4"/>
          <connect from_op="Append" from_port="merged set" to_op="Generate Copy" to_port="example set input"/>
          <connect from_op="Generate Copy" from_port="example set output" to_op="Text to Nominal" to_port="example set input"/>
          <connect from_op="Text to Nominal" from_port="example set output" to_op="Rename (2)" to_port="example set input"/>
          <connect from_op="Rename (2)" from_port="example set output" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="nominal_to_text" compatibility="8.1.003" expanded="true" height="82" name="Nominal to Text (2)" width="90" x="179" y="187">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="Content|Title"/>
      </operator>
      <operator activated="true" class="set_role" compatibility="8.1.003" expanded="true" height="82" name="Set Role (3)" width="90" x="313" y="187">
        <parameter key="attribute_name" value="Title2"/>
        <parameter key="target_role" value="T2"/>
        <list key="set_additional_roles">
          <parameter key="Author" value="author"/>
          <parameter key="Link" value="link"/>
          <parameter key="Published" value="date"/>
          <parameter key="Title2" value="tittle2"/>
          <parameter key="id" value="id"/>
        </list>
      </operator>
      <operator activated="true" breakpoints="after" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data (2)" width="90" x="581" y="85">
        <parameter key="vector_creation" value="Term Occurrences"/>
        <list key="specify_weights"/>
        <process expanded="true">
          <operator activated="false" class="web:extract_html_text_content" compatibility="7.3.000" expanded="true" height="68" name="Extract Content (2)" width="90" x="45" y="187"/>
          <operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize (2)" width="90" x="179" y="34"/>
          <operator activated="true" class="text:transform_cases" compatibility="8.1.000" expanded="true" height="68" name="Transform Cases (2)" width="90" x="313" y="34"/>
          <operator activated="true" class="text:generate_n_grams_terms" compatibility="8.1.000" expanded="true" height="68" name="Generate n-Grams (Terms)" width="90" x="514" y="34">
            <parameter key="max_length" value="3"/>
          </operator>
          <connect from_port="document" to_op="Tokenize (2)" to_port="document"/>
          <connect from_op="Tokenize (2)" from_port="document" to_op="Transform Cases (2)" to_port="document"/>
          <connect from_op="Transform Cases (2)" from_port="document" to_op="Generate n-Grams (Terms)" to_port="document"/>
          <connect from_op="Generate n-Grams (Terms)" from_port="document" to_port="document 1"/>
          <portSpacing port="source_document" spacing="0"/>
          <portSpacing port="sink_document 1" spacing="0"/>
          <portSpacing port="sink_document 2" spacing="0"/>
        </process>
        <description align="center" color="transparent" colored="false" width="126">We use a common process for both the training and testing set</description>
      </operator>
      <operator activated="true" class="remove_useless_attributes" compatibility="8.1.003" expanded="true" height="82" name="Remove Useless Attributes" width="90" x="916" y="85"/>
      <operator activated="true" class="multiply" compatibility="8.1.003" expanded="true" height="103" name="Multiply" width="90" x="1050" y="85"/>
      <operator activated="true" class="subprocess" compatibility="8.1.003" expanded="true" height="103" name="post-processing" width="90" x="1184" y="85">
        <process expanded="true">
          <operator activated="true" class="generate_attributes" compatibility="8.1.003" expanded="true" height="82" name="Generate Attributes (4)" width="90" x="45" y="34">
            <list key="function_descriptions">
              <parameter key="COMBINED" value="&quot;&quot;"/>
            </list>
          </operator>
          <operator activated="true" class="concurrency:loop_attributes" compatibility="8.1.003" expanded="true" height="82" name="Loop Attributes" width="90" x="246" y="34">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="COMBINED"/>
            <parameter key="invert_selection" value="true"/>
            <parameter key="reuse_results" value="true"/>
            <process expanded="true">
              <operator activated="true" class="generate_attributes" compatibility="8.1.003" expanded="true" height="82" name="Generate Attributes (2)" width="90" x="246" y="34">
                <list key="function_descriptions">
                  <parameter key="new_col" value="if(eval(%{loop_attribute})==1, %{loop_attribute},&quot;&quot;)"/>
                </list>
              </operator>
              <operator activated="true" class="select_attributes" compatibility="8.1.003" expanded="true" height="82" name="Select Attributes (2)" width="90" x="380" y="34">
                <parameter key="attribute_filter_type" value="single"/>
                <parameter key="attribute" value="%{loop_attribute}"/>
                <parameter key="invert_selection" value="true"/>
              </operator>
              <operator activated="false" breakpoints="before,after" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace" width="90" x="45" y="187">
                <parameter key="replace_what" value="[0-9]"/>
                <parameter key="replace_by" value="%{loop_attribute}"/>
                <description align="center" color="transparent" colored="false" width="126"/>
              </operator>
              <operator activated="true" class="generate_attributes" compatibility="8.1.003" expanded="true" height="82" name="Generate Attributes (3)" width="90" x="514" y="34">
                <list key="function_descriptions">
                  <parameter key="COMBINED" value="if(new_col==&quot;&quot;, COMBINED, concat(new_col,&quot;,&quot;,COMBINED))"/>
                </list>
              </operator>
              <operator activated="true" class="rename" compatibility="8.1.003" expanded="true" height="82" name="Rename" width="90" x="648" y="34">
                <parameter key="old_name" value="new_col"/>
                <parameter key="new_name" value="%{loop_attribute}"/>
                <list key="rename_additional_attributes"/>
              </operator>
              <connect from_port="input 1" to_op="Generate Attributes (2)" to_port="example set input"/>
              <connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
              <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Generate Attributes (3)" to_port="example set input"/>
              <connect from_op="Generate Attributes (3)" from_port="example set output" to_op="Rename" to_port="example set input"/>
              <connect from_op="Rename" from_port="example set output" to_port="output 1"/>
              <portSpacing port="source_input 1" spacing="0"/>
              <portSpacing port="source_input 2" spacing="0"/>
              <portSpacing port="sink_output 1" spacing="0"/>
              <portSpacing port="sink_output 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="set_role" compatibility="8.1.003" expanded="true" height="82" name="Set Role (2)" width="90" x="514" y="34">
            <parameter key="attribute_name" value="COMBINED"/>
            <parameter key="target_role" value="entity"/>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="select_attributes" compatibility="8.1.003" expanded="true" height="82" name="Select Attributes (3)" width="90" x="45" y="187">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attributes" value="author|pub_date|source_domain"/>
            <parameter key="invert_selection" value="true"/>
            <parameter key="include_special_attributes" value="true"/>
          </operator>
          <operator activated="true" class="numerical_to_polynominal" compatibility="8.1.003" expanded="true" height="82" name="Numerical to Polynominal" width="90" x="179" y="187">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="id"/>
            <parameter key="include_special_attributes" value="true"/>
          </operator>
          <operator activated="true" class="data_to_similarity" compatibility="8.1.003" expanded="true" height="82" name="Data to Similarity" width="90" x="313" y="187">
            <parameter key="measure_types" value="NumericalMeasures"/>
            <parameter key="numerical_measure" value="JaccardSimilarity"/>
          </operator>
          <operator activated="true" class="similarity_to_data" compatibility="8.1.003" expanded="true" height="82" name="Similarity to Data" width="90" x="447" y="187"/>
          <operator activated="true" class="filter_examples" compatibility="8.1.003" expanded="true" height="103" name="Filter Examples" width="90" x="581" y="187">
            <list key="filters_list">
              <parameter key="filters_entry_key" value="SIMILARITY.is_not_missing."/>
            </list>
          </operator>
          <operator activated="true" class="sort" compatibility="8.1.003" expanded="true" height="82" name="Sort" width="90" x="715" y="187">
            <parameter key="attribute_name" value="SIMILARITY"/>
            <parameter key="sorting_direction" value="decreasing"/>
          </operator>
          <connect from_port="in 1" to_op="Generate Attributes (4)" to_port="example set input"/>
          <connect from_port="in 2" to_op="Select Attributes (3)" to_port="example set input"/>
          <connect from_op="Generate Attributes (4)" from_port="example set output" to_op="Loop Attributes" to_port="input 1"/>
          <connect from_op="Loop Attributes" from_port="output 1" to_op="Set Role (2)" to_port="example set input"/>
          <connect from_op="Set Role (2)" from_port="example set output" to_port="out 1"/>
          <connect from_op="Select Attributes (3)" from_port="example set output" to_op="Numerical to Polynominal" to_port="example set input"/>
          <connect from_op="Numerical to Polynominal" from_port="example set output" to_op="Data to Similarity" to_port="example set"/>
          <connect from_op="Data to Similarity" from_port="similarity" to_op="Similarity to Data" to_port="similarity"/>
          <connect from_op="Data to Similarity" from_port="example set" to_op="Similarity to Data" to_port="exampleSet"/>
          <connect from_op="Similarity to Data" from_port="exampleSet" to_op="Filter Examples" to_port="example set input"/>
          <connect from_op="Filter Examples" from_port="example set output" to_op="Sort" to_port="example set input"/>
          <connect from_op="Sort" from_port="example set output" to_port="out 2"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="source_in 3" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
          <portSpacing port="sink_out 3" spacing="0"/>
        </process>
      </operator>
      <connect from_op="Retrieve wordlist_result_entity_company_name" from_port="output" to_op="Process Documents from Data (2)" to_port="word list"/>
      <connect from_op="Get News Feeds" from_port="out 1" to_op="Nominal to Text (2)" to_port="example set input"/>
      <connect from_op="Nominal to Text (2)" from_port="example set output" to_op="Set Role (3)" to_port="example set input"/>
      <connect from_op="Set Role (3)" from_port="example set output" to_op="Process Documents from Data (2)" to_port="example set"/>
      <connect from_op="Process Documents from Data (2)" from_port="example set" to_op="Remove Useless Attributes" to_port="example set input"/>
      <connect from_op="Remove Useless Attributes" from_port="example set output" to_op="Multiply" to_port="input"/>
      <connect from_op="Multiply" from_port="output 1" to_op="post-processing" to_port="in 1"/>
      <connect from_op="Multiply" from_port="output 2" to_op="post-processing" to_port="in 2"/>
      <connect from_op="post-processing" from_port="out 1" to_port="result 1"/>
      <connect from_op="post-processing" from_port="out 2" to_port="result 2"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="42"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="84"/>
      <description align="left" color="yellow" colored="false" height="261" resized="true" width="257" x="1290" y="10">Extracted compnay name and find co-existence&lt;br&gt;find similar documents with the metioned entity name</description>
    </process>
  </operator>
</process>

Entity recoginition for the company names mentioned in news tittle, with a lisf of target like

19_entertainment
20th_century_fox
23andme
27b/6
37signals
3com
3m
7-eleven
a&m_records
a&w_root_beer

Telcontar120 · April 2018

Wonderful, thanks so much for those!!

Howdy, Stranger!

Quick Links

Categories

Altair RapidMiner Community

GET HELP. LEARN BEST PRACTICES. NETWORK WITH YOUR PEERS.

Text Mining Use Cases and Capabilities with RapidMiner

Comments