RapidMiner

‎04-12-2018 09:22 AM

Screen Shot 2018-04-12 at 9.16.24 AM.png

 

Attached is the slide deck that summarizes the major functions and techniques offered by the Text Processing, Web Mining, and Operator Toolbox extensions.

 

 

 

 

Comments
Unicorn
Unicorn

@yyhuang looks like some great new applications here.  The presentation contains screenshots that appear to be of annotated processes---are there any new templates or examples that correspond to some of the newer capabilities described in the deck available for download?

Unicorn
Unicorn

Thanks @yyhuang, this has inspired me to upgrade my Twitter Content models. 

RM Staff RM Staff
RM Staff

Thanks for your interests! Of course, I will share some template process as supplemental files to the slides. 

Unicorn
Unicorn

@yyhuang any ETA on those template processes? :-)

Thanks!

RM Staff RM Staff
RM Staff
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="8.1.003" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="8.1.003" expanded="true" height="68" name="Retrieve Historical Sentiments" width="90" x="45" y="85">
        <parameter key="repository_entry" value="//Samples/Templates/Sentiment Analysis/Historical Sentiments"/>
      </operator>
      <operator activated="true" class="nominal_to_text" compatibility="8.1.003" expanded="true" height="82" name="Nominal to Text" width="90" x="179" y="85">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="Text"/>
      </operator>
      <operator activated="true" class="text:data_to_documents" compatibility="8.1.000" expanded="true" height="68" name="Data to Documents" width="90" x="313" y="85">
        <list key="specify_weights"/>
      </operator>
      <operator activated="true" class="text:process_documents" compatibility="8.1.000" expanded="true" height="103" name="Process Documents" width="90" x="447" y="85">
        <process expanded="true">
          <operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize" width="90" x="112" y="34"/>
          <operator activated="true" class="text:transform_cases" compatibility="8.1.000" expanded="true" height="68" name="Transform Cases" width="90" x="313" y="34"/>
          <operator activated="true" class="text:filter_stopwords_english" compatibility="8.1.000" expanded="true" height="68" name="Filter Stopwords (English)" width="90" x="514" y="34"/>
          <operator activated="true" class="wordnet:open_wordnet_dictionary" compatibility="5.3.000" expanded="true" height="68" name="Open WordNet Dictionary" width="90" x="514" y="136">
            <parameter key="directory" value="C:\Users\YuanyuanHuang\Documents\WordNet-3.0\dict"/>
          </operator>
          <operator activated="true" class="wordnet:stem_wordnet" compatibility="5.3.000" expanded="true" height="82" name="Stem (WordNet)" width="90" x="715" y="34"/>
          <connect from_port="document" to_op="Tokenize" to_port="document"/>
          <connect from_op="Tokenize" from_port="document" to_op="Transform Cases" to_port="document"/>
          <connect from_op="Transform Cases" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
          <connect from_op="Filter Stopwords (English)" from_port="document" to_op="Stem (WordNet)" to_port="document"/>
          <connect from_op="Open WordNet Dictionary" from_port="dictionary" to_op="Stem (WordNet)" to_port="dictionary"/>
          <connect from_op="Stem (WordNet)" from_port="document" to_port="document 1"/>
          <portSpacing port="source_document" spacing="0"/>
          <portSpacing port="sink_document 1" spacing="0"/>
          <portSpacing port="sink_document 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="singular_value_decomposition" compatibility="8.1.003" expanded="true" height="103" name="SVD" width="90" x="581" y="85">
        <parameter key="dimensionality_reduction" value="keep percentage"/>
        <parameter key="percentage_threshold" value="0.6"/>
      </operator>
      <connect from_op="Retrieve Historical Sentiments" from_port="output" to_op="Nominal to Text" to_port="example set input"/>
      <connect from_op="Nominal to Text" from_port="example set output" to_op="Data to Documents" to_port="example set"/>
      <connect from_op="Data to Documents" from_port="documents" to_op="Process Documents" to_port="documents 1"/>
      <connect from_op="Process Documents" from_port="example set" to_op="SVD" to_port="example set input"/>
      <connect from_op="SVD" from_port="example set output" to_port="result 1"/>
      <connect from_op="SVD" from_port="original" to_port="result 2"/>
      <connect from_op="SVD" from_port="preprocessing model" to_port="result 3"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="42"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
      <portSpacing port="sink_result 4" spacing="21"/>
      <description align="center" color="yellow" colored="false" height="63" resized="true" width="455" x="263" y="10">LSA is quite simple, you just use SVD to perform dimensionality reduction on the tf-idf vectors&amp;#8211;that&amp;#8217;s really all there is to it!</description>
    </process>
  </operator>
</process>

LSA or LSI

RM Staff RM Staff
RM Staff
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="8.1.003" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="8.1.003" expanded="true" height="68" name="Retrieve Historical Sentiments" width="90" x="45" y="34">
        <parameter key="repository_entry" value="//Samples/Templates/Sentiment Analysis/Historical Sentiments"/>
      </operator>
      <operator activated="true" class="nominal_to_text" compatibility="8.1.003" expanded="true" height="82" name="Nominal to Text" width="90" x="179" y="34">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="Text"/>
      </operator>
      <operator activated="true" class="text:data_to_documents" compatibility="8.1.000" expanded="true" height="68" name="Data to Documents" width="90" x="313" y="34">
        <list key="specify_weights"/>
      </operator>
      <operator activated="true" class="loop_collection" compatibility="8.1.003" expanded="true" height="82" name="Loop Collection" width="90" x="447" y="34">
        <process expanded="true">
          <operator activated="true" class="retrieve" compatibility="8.1.003" expanded="true" height="68" name="Retrieve filter token" width="90" x="380" y="136">
            <parameter key="repository_entry" value="filter token"/>
          </operator>
          <operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize (2)" width="90" x="45" y="34">
            <parameter key="mode" value="specify characters"/>
            <parameter key="characters" value=".:, "/>
          </operator>
          <operator activated="true" class="text:transform_cases" compatibility="8.1.000" expanded="true" height="68" name="Transform Cases" width="90" x="179" y="34"/>
          <operator activated="true" class="wordnet:open_wordnet_dictionary" compatibility="5.3.000" expanded="true" height="68" name="Open WordNet Dictionary" width="90" x="179" y="136">
            <parameter key="directory" value="C:\Users\YuanyuanHuang\Documents\WordNet-3.0\dict"/>
          </operator>
          <operator activated="true" class="wordnet:stem_wordnet" compatibility="5.3.000" expanded="true" height="82" name="Stem (WordNet)" width="90" x="380" y="34"/>
          <operator activated="true" class="operator_toolbox:filter_tokens_using_exampleset" compatibility="1.0.000" expanded="true" height="82" name="Filter Tokens Using ExampleSet" width="90" x="581" y="34">
            <parameter key="attribute" value="word"/>
          </operator>
          <operator activated="false" class="text:generate_n_grams_terms" compatibility="8.1.000" expanded="true" height="68" name="Generate n-Grams (Terms)" width="90" x="715" y="34">
            <parameter key="max_length" value="5"/>
            <description align="center" color="transparent" colored="false" width="126">generate n-grams with max length of 5</description>
          </operator>
          <connect from_port="single" to_op="Tokenize (2)" to_port="document"/>
          <connect from_op="Retrieve filter token" from_port="output" to_op="Filter Tokens Using ExampleSet" to_port="example set"/>
          <connect from_op="Tokenize (2)" from_port="document" to_op="Transform Cases" to_port="document"/>
          <connect from_op="Transform Cases" from_port="document" to_op="Stem (WordNet)" to_port="document"/>
          <connect from_op="Open WordNet Dictionary" from_port="dictionary" to_op="Stem (WordNet)" to_port="dictionary"/>
          <connect from_op="Stem (WordNet)" from_port="document" to_op="Filter Tokens Using ExampleSet" to_port="document"/>
          <connect from_op="Filter Tokens Using ExampleSet" from_port="document" to_port="output 1"/>
          <portSpacing port="source_single" spacing="0"/>
          <portSpacing port="sink_output 1" spacing="0"/>
          <portSpacing port="sink_output 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="operator_toolbox:lda" compatibility="1.0.000" expanded="true" height="124" name="LDA" width="90" x="581" y="34">
        <parameter key="number_of_topics" value="5"/>
        <parameter key="use_alpha_heuristics" value="false"/>
        <parameter key="alpha" value="50.0"/>
        <parameter key="top_words_per_topic" value="15"/>
        <parameter key="include_meta_data" value="false"/>
      </operator>
      <connect from_op="Retrieve Historical Sentiments" from_port="output" to_op="Nominal to Text" to_port="example set input"/>
      <connect from_op="Nominal to Text" from_port="example set output" to_op="Data to Documents" to_port="example set"/>
      <connect from_op="Data to Documents" from_port="documents" to_op="Loop Collection" to_port="collection"/>
      <connect from_op="Loop Collection" from_port="output 1" to_op="LDA" to_port="col"/>
      <connect from_op="LDA" from_port="exa" to_port="result 1"/>
      <connect from_op="LDA" from_port="top" to_port="result 2"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
    </process>
  </operator>
</process>

 LDA

 

The used filter on token is a data like

 

and
the
you
for
in
on
from
of
am
is
was
are
be
i
that
with
very
really
can
has
will
this
they
RM Staff RM Staff
RM Staff
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="8.1.003" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="8.1.003" expanded="true" height="68" name="Retrieve wordlist_result_entity_company_name" width="90" x="45" y="85">
        <parameter key="repository_entry" value="wordlist_result_entity_company_name"/>
      </operator>
      <operator activated="true" class="subprocess" compatibility="8.1.003" expanded="true" height="82" name="Get News Feeds" width="90" x="45" y="187">
        <process expanded="true">
          <operator activated="true" class="web:read_rss" compatibility="7.3.000" expanded="true" height="68" name="BBC Top Stories" width="90" x="45" y="34">
            <parameter key="url" value="http://feeds.bbci.co.uk/news/rss.xml"/>
          </operator>
          <operator activated="true" class="web:read_rss" compatibility="7.3.000" expanded="true" height="68" name="BBC Asia" width="90" x="45" y="85">
            <parameter key="url" value="http://feeds.bbci.co.uk/news/world/asia/rss.xml"/>
          </operator>
          <operator activated="true" class="web:read_rss" compatibility="7.3.000" expanded="true" height="68" name="BBC Business" width="90" x="45" y="136">
            <parameter key="url" value="http://feeds.bbci.co.uk/news/business/rss.xml"/>
          </operator>
          <operator activated="true" class="web:read_rss" compatibility="7.3.000" expanded="true" height="68" name="BBC Entertainment" width="90" x="45" y="187">
            <parameter key="url" value="http://feeds.bbci.co.uk/news/entertainment_and_arts/rss.xml"/>
          </operator>
          <operator activated="true" class="append" compatibility="8.1.003" expanded="true" height="145" name="Append" width="90" x="179" y="34"/>
          <operator activated="true" class="generate_copy" compatibility="8.1.003" expanded="true" height="82" name="Generate Copy" width="90" x="313" y="34">
            <parameter key="attribute_name" value="Title"/>
            <parameter key="new_name" value="Title2"/>
          </operator>
          <operator activated="true" class="text_to_nominal" compatibility="8.1.003" expanded="true" height="82" name="Text to Nominal" width="90" x="447" y="34">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attributes" value="Link|Title2"/>
            <description align="center" color="transparent" colored="false" width="126">Don't convert article link to document text.</description>
          </operator>
          <operator activated="true" class="rename" compatibility="8.1.003" expanded="true" height="82" name="Rename (2)" width="90" x="581" y="34">
            <parameter key="old_name" value="Id"/>
            <parameter key="new_name" value="id"/>
            <list key="rename_additional_attributes"/>
          </operator>
          <connect from_op="BBC Top Stories" from_port="output" to_op="Append" to_port="example set 1"/>
          <connect from_op="BBC Asia" from_port="output" to_op="Append" to_port="example set 2"/>
          <connect from_op="BBC Business" from_port="output" to_op="Append" to_port="example set 3"/>
          <connect from_op="BBC Entertainment" from_port="output" to_op="Append" to_port="example set 4"/>
          <connect from_op="Append" from_port="merged set" to_op="Generate Copy" to_port="example set input"/>
          <connect from_op="Generate Copy" from_port="example set output" to_op="Text to Nominal" to_port="example set input"/>
          <connect from_op="Text to Nominal" from_port="example set output" to_op="Rename (2)" to_port="example set input"/>
          <connect from_op="Rename (2)" from_port="example set output" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="nominal_to_text" compatibility="8.1.003" expanded="true" height="82" name="Nominal to Text (2)" width="90" x="179" y="187">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="Content|Title"/>
      </operator>
      <operator activated="true" class="set_role" compatibility="8.1.003" expanded="true" height="82" name="Set Role (3)" width="90" x="313" y="187">
        <parameter key="attribute_name" value="Title2"/>
        <parameter key="target_role" value="T2"/>
        <list key="set_additional_roles">
          <parameter key="Author" value="author"/>
          <parameter key="Link" value="link"/>
          <parameter key="Published" value="date"/>
          <parameter key="Title2" value="tittle2"/>
          <parameter key="id" value="id"/>
        </list>
      </operator>
      <operator activated="true" breakpoints="after" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data (2)" width="90" x="581" y="85">
        <parameter key="vector_creation" value="Term Occurrences"/>
        <list key="specify_weights"/>
        <process expanded="true">
          <operator activated="false" class="web:extract_html_text_content" compatibility="7.3.000" expanded="true" height="68" name="Extract Content (2)" width="90" x="45" y="187"/>
          <operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize (2)" width="90" x="179" y="34"/>
          <operator activated="true" class="text:transform_cases" compatibility="8.1.000" expanded="true" height="68" name="Transform Cases (2)" width="90" x="313" y="34"/>
          <operator activated="true" class="text:generate_n_grams_terms" compatibility="8.1.000" expanded="true" height="68" name="Generate n-Grams (Terms)" width="90" x="514" y="34">
            <parameter key="max_length" value="3"/>
          </operator>
          <connect from_port="document" to_op="Tokenize (2)" to_port="document"/>
          <connect from_op="Tokenize (2)" from_port="document" to_op="Transform Cases (2)" to_port="document"/>
          <connect from_op="Transform Cases (2)" from_port="document" to_op="Generate n-Grams (Terms)" to_port="document"/>
          <connect from_op="Generate n-Grams (Terms)" from_port="document" to_port="document 1"/>
          <portSpacing port="source_document" spacing="0"/>
          <portSpacing port="sink_document 1" spacing="0"/>
          <portSpacing port="sink_document 2" spacing="0"/>
        </process>
        <description align="center" color="transparent" colored="false" width="126">We use a common process for both the training and testing set</description>
      </operator>
      <operator activated="true" class="remove_useless_attributes" compatibility="8.1.003" expanded="true" height="82" name="Remove Useless Attributes" width="90" x="916" y="85"/>
      <operator activated="true" class="multiply" compatibility="8.1.003" expanded="true" height="103" name="Multiply" width="90" x="1050" y="85"/>
      <operator activated="true" class="subprocess" compatibility="8.1.003" expanded="true" height="103" name="post-processing" width="90" x="1184" y="85">
        <process expanded="true">
          <operator activated="true" class="generate_attributes" compatibility="8.1.003" expanded="true" height="82" name="Generate Attributes (4)" width="90" x="45" y="34">
            <list key="function_descriptions">
              <parameter key="COMBINED" value="&quot;&quot;"/>
            </list>
          </operator>
          <operator activated="true" class="concurrency:loop_attributes" compatibility="8.1.003" expanded="true" height="82" name="Loop Attributes" width="90" x="246" y="34">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="COMBINED"/>
            <parameter key="invert_selection" value="true"/>
            <parameter key="reuse_results" value="true"/>
            <process expanded="true">
              <operator activated="true" class="generate_attributes" compatibility="8.1.003" expanded="true" height="82" name="Generate Attributes (2)" width="90" x="246" y="34">
                <list key="function_descriptions">
                  <parameter key="new_col" value="if(eval(%{loop_attribute})==1, %{loop_attribute},&quot;&quot;)"/>
                </list>
              </operator>
              <operator activated="true" class="select_attributes" compatibility="8.1.003" expanded="true" height="82" name="Select Attributes (2)" width="90" x="380" y="34">
                <parameter key="attribute_filter_type" value="single"/>
                <parameter key="attribute" value="%{loop_attribute}"/>
                <parameter key="invert_selection" value="true"/>
              </operator>
              <operator activated="false" breakpoints="before,after" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace" width="90" x="45" y="187">
                <parameter key="replace_what" value="[0-9]"/>
                <parameter key="replace_by" value="%{loop_attribute}"/>
                <description align="center" color="transparent" colored="false" width="126"/>
              </operator>
              <operator activated="true" class="generate_attributes" compatibility="8.1.003" expanded="true" height="82" name="Generate Attributes (3)" width="90" x="514" y="34">
                <list key="function_descriptions">
                  <parameter key="COMBINED" value="if(new_col==&quot;&quot;, COMBINED, concat(new_col,&quot;,&quot;,COMBINED))"/>
                </list>
              </operator>
              <operator activated="true" class="rename" compatibility="8.1.003" expanded="true" height="82" name="Rename" width="90" x="648" y="34">
                <parameter key="old_name" value="new_col"/>
                <parameter key="new_name" value="%{loop_attribute}"/>
                <list key="rename_additional_attributes"/>
              </operator>
              <connect from_port="input 1" to_op="Generate Attributes (2)" to_port="example set input"/>
              <connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
              <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Generate Attributes (3)" to_port="example set input"/>
              <connect from_op="Generate Attributes (3)" from_port="example set output" to_op="Rename" to_port="example set input"/>
              <connect from_op="Rename" from_port="example set output" to_port="output 1"/>
              <portSpacing port="source_input 1" spacing="0"/>
              <portSpacing port="source_input 2" spacing="0"/>
              <portSpacing port="sink_output 1" spacing="0"/>
              <portSpacing port="sink_output 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="set_role" compatibility="8.1.003" expanded="true" height="82" name="Set Role (2)" width="90" x="514" y="34">
            <parameter key="attribute_name" value="COMBINED"/>
            <parameter key="target_role" value="entity"/>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="select_attributes" compatibility="8.1.003" expanded="true" height="82" name="Select Attributes (3)" width="90" x="45" y="187">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attributes" value="author|pub_date|source_domain"/>
            <parameter key="invert_selection" value="true"/>
            <parameter key="include_special_attributes" value="true"/>
          </operator>
          <operator activated="true" class="numerical_to_polynominal" compatibility="8.1.003" expanded="true" height="82" name="Numerical to Polynominal" width="90" x="179" y="187">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="id"/>
            <parameter key="include_special_attributes" value="true"/>
          </operator>
          <operator activated="true" class="data_to_similarity" compatibility="8.1.003" expanded="true" height="82" name="Data to Similarity" width="90" x="313" y="187">
            <parameter key="measure_types" value="NumericalMeasures"/>
            <parameter key="numerical_measure" value="JaccardSimilarity"/>
          </operator>
          <operator activated="true" class="similarity_to_data" compatibility="8.1.003" expanded="true" height="82" name="Similarity to Data" width="90" x="447" y="187"/>
          <operator activated="true" class="filter_examples" compatibility="8.1.003" expanded="true" height="103" name="Filter Examples" width="90" x="581" y="187">
            <list key="filters_list">
              <parameter key="filters_entry_key" value="SIMILARITY.is_not_missing."/>
            </list>
          </operator>
          <operator activated="true" class="sort" compatibility="8.1.003" expanded="true" height="82" name="Sort" width="90" x="715" y="187">
            <parameter key="attribute_name" value="SIMILARITY"/>
            <parameter key="sorting_direction" value="decreasing"/>
          </operator>
          <connect from_port="in 1" to_op="Generate Attributes (4)" to_port="example set input"/>
          <connect from_port="in 2" to_op="Select Attributes (3)" to_port="example set input"/>
          <connect from_op="Generate Attributes (4)" from_port="example set output" to_op="Loop Attributes" to_port="input 1"/>
          <connect from_op="Loop Attributes" from_port="output 1" to_op="Set Role (2)" to_port="example set input"/>
          <connect from_op="Set Role (2)" from_port="example set output" to_port="out 1"/>
          <connect from_op="Select Attributes (3)" from_port="example set output" to_op="Numerical to Polynominal" to_port="example set input"/>
          <connect from_op="Numerical to Polynominal" from_port="example set output" to_op="Data to Similarity" to_port="example set"/>
          <connect from_op="Data to Similarity" from_port="similarity" to_op="Similarity to Data" to_port="similarity"/>
          <connect from_op="Data to Similarity" from_port="example set" to_op="Similarity to Data" to_port="exampleSet"/>
          <connect from_op="Similarity to Data" from_port="exampleSet" to_op="Filter Examples" to_port="example set input"/>
          <connect from_op="Filter Examples" from_port="example set output" to_op="Sort" to_port="example set input"/>
          <connect from_op="Sort" from_port="example set output" to_port="out 2"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="source_in 3" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
          <portSpacing port="sink_out 3" spacing="0"/>
        </process>
      </operator>
      <connect from_op="Retrieve wordlist_result_entity_company_name" from_port="output" to_op="Process Documents from Data (2)" to_port="word list"/>
      <connect from_op="Get News Feeds" from_port="out 1" to_op="Nominal to Text (2)" to_port="example set input"/>
      <connect from_op="Nominal to Text (2)" from_port="example set output" to_op="Set Role (3)" to_port="example set input"/>
      <connect from_op="Set Role (3)" from_port="example set output" to_op="Process Documents from Data (2)" to_port="example set"/>
      <connect from_op="Process Documents from Data (2)" from_port="example set" to_op="Remove Useless Attributes" to_port="example set input"/>
      <connect from_op="Remove Useless Attributes" from_port="example set output" to_op="Multiply" to_port="input"/>
      <connect from_op="Multiply" from_port="output 1" to_op="post-processing" to_port="in 1"/>
      <connect from_op="Multiply" from_port="output 2" to_op="post-processing" to_port="in 2"/>
      <connect from_op="post-processing" from_port="out 1" to_port="result 1"/>
      <connect from_op="post-processing" from_port="out 2" to_port="result 2"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="42"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="84"/>
      <description align="left" color="yellow" colored="false" height="261" resized="true" width="257" x="1290" y="10">Extracted compnay name and find co-existence&lt;br&gt;find similar documents with the metioned entity name</description>
    </process>
  </operator>
</process>

Entity recoginition for the company names mentioned in news tittle, with a lisf of target like

19_entertainment
20th_century_fox
23andme
27b/6
37signals
3com
3m
7-eleven
a&m_records
a&w_root_beer

 

Unicorn
Unicorn

Wonderful, thanks so much for those!!