create FP-Growth graph

TobiasNehrigTobiasNehrig Member Posts: 41 Guru
edited December 2018 in Help

Hi Experts,

 

I've a questions about creating a graph form the results of the FP-Growth operator without using the Create Association Rules operator. Is there a way to visualize the FP-Growth results in a graph?

<?xml version="1.0" encoding="UTF-8"?><process version="8.2.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="8.2.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="subprocess" compatibility="8.2.001" expanded="true" height="82" name="Crawler" width="90" x="45" y="34">
<process expanded="true">
<operator activated="true" class="subprocess" compatibility="8.2.001" expanded="true" height="82" name="Crawler Spon" width="90" x="45" y="34">
<process expanded="true">
<operator activated="true" class="web:crawl_web_modern" compatibility="7.3.000" expanded="true" height="68" name="Crawl Web" width="90" x="112" y="34">
<parameter key="url" value="http://www.spiegel.de"/>
<list key="crawling_rules">
<parameter key="store_with_matching_url" value=".+www.spiegel.+"/>
<parameter key="follow_link_with_matching_url" value=".+spiegel.+|.+de.+"/>
</list>
<parameter key="max_crawl_depth" value="10"/>
<parameter key="retrieve_as_html" value="true"/>
<parameter key="add_content_as_attribute" value="true"/>
<parameter key="max_pages" value="2000"/>
<parameter key="max_page_size" value="100000"/>
<parameter key="delay" value="100"/>
<parameter key="max_concurrent_connections" value="200"/>
<parameter key="max_connections_per_host" value="100"/>
<parameter key="user_agent" value="Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:57.0) Gecko/20100101 Firefox/57.0"/>
</operator>
<operator activated="true" class="free_memory" compatibility="8.2.001" expanded="true" height="82" name="Free Memory (8)" width="90" x="246" y="34"/>
<operator activated="true" class="web:retrieve_webpages" compatibility="7.3.000" expanded="true" height="68" name="Get Pages" width="90" x="447" y="34">
<parameter key="link_attribute" value="Link"/>
<parameter key="page_attribute" value="link"/>
<parameter key="random_user_agent" value="true"/>
</operator>
<operator activated="true" class="free_memory" compatibility="8.2.001" expanded="true" height="82" name="Free Memory (7)" width="90" x="648" y="34"/>
<connect from_op="Crawl Web" from_port="example set" to_op="Free Memory (8)" to_port="through 1"/>
<connect from_op="Free Memory (8)" from_port="through 1" to_op="Get Pages" to_port="Example Set"/>
<connect from_op="Get Pages" from_port="Example Set" to_op="Free Memory (7)" to_port="through 1"/>
<connect from_op="Free Memory (7)" from_port="through 1" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data (3)" width="90" x="246" y="34">
<parameter key="create_word_vector" value="false"/>
<parameter key="keep_text" value="true"/>
<list key="specify_weights"/>
<process expanded="true">
<operator activated="true" class="web:extract_html_text_content" compatibility="7.3.000" expanded="true" height="68" name="Extract Content" width="90" x="179" y="34">
<parameter key="ignore_non_html_tags" value="false"/>
</operator>
<operator activated="true" class="free_memory" compatibility="8.2.001" expanded="true" height="82" name="Free Memory (9)" width="90" x="380" y="34"/>
<connect from_port="document" to_op="Extract Content" to_port="document"/>
<connect from_op="Extract Content" from_port="document" to_op="Free Memory (9)" to_port="through 1"/>
<connect from_op="Free Memory (9)" from_port="through 1" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="store" compatibility="8.2.001" expanded="true" height="68" name="Store" width="90" x="514" y="34">
<parameter key="repository_entry" value="../spon-seiten roh"/>
</operator>
<connect from_op="Crawler Spon" from_port="out 1" to_op="Process Documents from Data (3)" to_port="example set"/>
<connect from_op="Process Documents from Data (3)" from_port="example set" to_op="Store" to_port="input"/>
<connect from_op="Store" from_port="through" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="subprocess" compatibility="8.2.001" expanded="true" height="124" name="Prepare Data" width="90" x="246" y="34">
<process expanded="true">
<operator activated="true" class="set_role" compatibility="8.2.001" expanded="true" height="82" name="Set Role (2)" width="90" x="45" y="34">
<parameter key="attribute_name" value="text"/>
<list key="set_additional_roles">
<parameter key="Title" value="label"/>
</list>
</operator>
<operator activated="true" class="generate_id" compatibility="8.2.001" expanded="true" height="82" name="Generate ID" width="90" x="179" y="34"/>
<operator activated="true" class="order_attributes" compatibility="8.2.001" expanded="true" height="82" name="Reorder Attributes" width="90" x="380" y="34">
<parameter key="attribute_ordering" value="Title|text"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="8.2.001" expanded="true" height="82" name="Select Attributes" width="90" x="581" y="34">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="text|Title|id"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="8.2.001" expanded="true" height="103" name="Filter Examples" width="90" x="715" y="34">
<list key="filters_list">
<parameter key="filters_entry_key" value="Title.is_not_missing."/>
</list>
<parameter key="filters_logic_and" value="false"/>
<parameter key="filters_check_metadata" value="false"/>
</operator>
<operator activated="true" class="multiply" compatibility="8.2.001" expanded="true" height="124" name="Multiply uncut" width="90" x="849" y="34"/>
<connect from_port="in 1" to_op="Set Role (2)" to_port="example set input"/>
<connect from_op="Set Role (2)" from_port="example set output" to_op="Generate ID" to_port="example set input"/>
<connect from_op="Generate ID" from_port="example set output" to_op="Reorder Attributes" to_port="example set input"/>
<connect from_op="Reorder Attributes" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Multiply uncut" to_port="input"/>
<connect from_op="Multiply uncut" from_port="output 1" to_port="out 1"/>
<connect from_op="Multiply uncut" from_port="output 2" to_port="out 2"/>
<connect from_op="Multiply uncut" from_port="output 3" to_port="out 3"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
<portSpacing port="sink_out 3" spacing="0"/>
<portSpacing port="sink_out 4" spacing="0"/>
</process>
</operator>
<operator activated="true" class="subprocess" compatibility="8.2.001" expanded="true" height="82" name="fp Growth" width="90" x="514" y="34">
<process expanded="true">
<operator activated="true" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data" width="90" x="313" y="34">
<parameter key="prune_method" value="percentual"/>
<parameter key="prune_below_percent" value="30.0"/>
<parameter key="prune_above_percent" value="100.0"/>
<parameter key="prune_below_absolute" value="20"/>
<parameter key="prune_above_absolute" value="2000"/>
<list key="specify_weights"/>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize Non-letters (4)" width="90" x="45" y="34"/>
<operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize Linguistic (4)" width="90" x="179" y="34">
<parameter key="mode" value="linguistic sentences"/>
<parameter key="language" value="German"/>
</operator>
<operator activated="true" class="text:filter_by_length" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (by Length)" width="90" x="313" y="34">
<parameter key="min_chars" value="2"/>
</operator>
<operator activated="true" class="text:filter_stopwords_german" compatibility="8.1.000" expanded="true" height="68" name="Filter Stopwords (4)" width="90" x="447" y="34"/>
<operator activated="true" class="text:stem_snowball" compatibility="8.1.000" expanded="true" height="68" name="Stem (Snowball)" width="90" x="581" y="34">
<parameter key="language" value="German"/>
</operator>
<operator activated="true" class="text:transform_cases" compatibility="8.1.000" expanded="true" height="68" name="Transform Cases (4)" width="90" x="715" y="34"/>
<operator activated="true" class="free_memory" compatibility="8.2.001" expanded="true" height="82" name="Free Memory (4)" width="90" x="849" y="34"/>
<connect from_port="document" to_op="Tokenize Non-letters (4)" to_port="document"/>
<connect from_op="Tokenize Non-letters (4)" from_port="document" to_op="Tokenize Linguistic (4)" to_port="document"/>
<connect from_op="Tokenize Linguistic (4)" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
<connect from_op="Filter Tokens (by Length)" from_port="document" to_op="Filter Stopwords (4)" to_port="document"/>
<connect from_op="Filter Stopwords (4)" from_port="document" to_op="Stem (Snowball)" to_port="document"/>
<connect from_op="Stem (Snowball)" from_port="document" to_op="Transform Cases (4)" to_port="document"/>
<connect from_op="Transform Cases (4)" from_port="document" to_op="Free Memory (4)" to_port="through 1"/>
<connect from_op="Free Memory (4)" from_port="through 1" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="subprocess" compatibility="8.2.001" expanded="true" height="82" name="Co-occurrence" width="90" x="514" y="34">
<process expanded="true">
<operator activated="true" class="text_to_nominal" compatibility="8.2.001" expanded="true" height="82" name="Text to Nominal" width="90" x="45" y="34"/>
<operator activated="true" class="numerical_to_binominal" compatibility="8.2.001" expanded="true" height="82" name="Numerical to Binominal" width="90" x="179" y="34"/>
<operator activated="true" class="concurrency:fp_growth" compatibility="8.2.001" expanded="true" height="82" name="FP-Growth" width="90" x="380" y="34">
<parameter key="positive_value" value="true"/>
<parameter key="min_support" value="0.5"/>
<parameter key="min_frequency" value="2"/>
<parameter key="find_min_number_of_itemsets" value="false"/>
<enumeration key="must_contain_list"/>
</operator>
<connect from_port="in 1" to_op="Text to Nominal" to_port="example set input"/>
<connect from_op="Text to Nominal" from_port="example set output" to_op="Numerical to Binominal" to_port="example set input"/>
<connect from_op="Numerical to Binominal" from_port="example set output" to_op="FP-Growth" to_port="example set"/>
<connect from_op="FP-Growth" from_port="frequent sets" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<connect from_port="in 1" to_op="Process Documents from Data" to_port="example set"/>
<connect from_op="Process Documents from Data" from_port="example set" to_op="Co-occurrence" to_port="in 1"/>
<connect from_op="Co-occurrence" from_port="out 1" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<connect from_op="Crawler" from_port="out 1" to_op="Prepare Data" to_port="in 1"/>
<connect from_op="Prepare Data" from_port="out 1" to_op="fp Growth" to_port="in 1"/>
<connect from_op="fp Growth" from_port="out 1" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>

best regards

Tobias

Tagged:

Best Answer

  • TobiasNehrigTobiasNehrig Member Posts: 41 Guru
    Solution Accepted

    Hi,

    i've found a solution to create a co-occurrence graph based the approach of @bhupendra_patil. After writing the FP-Growth result in a XML-File, I had to read the XML-File two times and create a new ExampleSet. 

     

    Bildschirmfoto vom 2018-08-08 16-56-11.png

     

    <?xml version="1.0" encoding="UTF-8"?><process version="9.0.000">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="9.0.000" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="advanced_file_connectors:read_xml" compatibility="8.1.000" expanded="true" height="68" name="Read XML Name Items" width="90" x="45" y="85">
    <parameter key="file" value="/home/knecht/output_fp-growth.ioo"/>
    <parameter key="xpath_for_examples" value="//object-stream/FrequentItemSets/FrequentItemSets/default/frequentSets/com.rapidminer.operator.learner.associations.FrequentItemSet"/>
    <enumeration key="xpaths_for_attributes">
    <parameter key="xpath_for_attribute" value="items[1]/com.rapidminer.extension.concurrency.operator.learner.associations.fpgrowth.NominalItem[1]/name[1]/text()"/>
    <parameter key="xpath_for_attribute" value="items[1]/com.rapidminer.extension.concurrency.operator.learner.associations.fpgrowth.NominalItem[1]/attribute::id"/>
    </enumeration>
    <list key="namespaces"/>
    <parameter key="use_default_namespace" value="false"/>
    <list key="annotations"/>
    <parameter key="locale" value="German"/>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="word.true.text.attribute"/>
    <parameter key="1" value="word_id.true.integer.attribute"/>
    </list>
    </operator>
    <operator activated="true" class="advanced_file_connectors:read_xml" compatibility="9.0.000" expanded="true" height="68" name="Read XML Count Items" width="90" x="45" y="187">
    <parameter key="file" value="/home/knecht/output_fp-growth.ioo"/>
    <parameter key="xpath_for_examples" value="//object-stream/FrequentItemSets/FrequentItemSets/default/frequentSets/com.rapidminer.operator.learner.associations.FrequentItemSet[count(items/*)=2]"/>
    <enumeration key="xpaths_for_attributes">
    <parameter key="xpath_for_attribute" value="items[1]/com.rapidminer.extension.concurrency.operator.learner.associations.fpgrowth.NominalItem[1]/attribute::reference"/>
    <parameter key="xpath_for_attribute" value="items[1]/com.rapidminer.extension.concurrency.operator.learner.associations.fpgrowth.NominalItem[2]/attribute::reference"/>
    <parameter key="xpath_for_attribute" value="frequency[1]/text()"/>
    </enumeration>
    <list key="namespaces"/>
    <parameter key="use_default_namespace" value="false"/>
    <list key="annotations"/>
    <parameter key="locale" value="German"/>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="word_id1.true.integer.attribute"/>
    <parameter key="1" value="word_id2.true.integer.attribute"/>
    <parameter key="2" value="frequency.true.real.attribute"/>
    </list>
    </operator>
    <operator activated="true" class="multiply" compatibility="9.0.000" expanded="true" height="103" name="Multiply (2)" width="90" x="179" y="85"/>
    <operator activated="true" class="concurrency:join" compatibility="9.0.000" expanded="true" height="82" name="Join" width="90" x="313" y="187">
    <parameter key="join_type" value="right"/>
    <parameter key="use_id_attribute_as_key" value="false"/>
    <list key="key_attributes">
    <parameter key="word_id" value="word_id1"/>
    </list>
    </operator>
    <operator activated="true" class="rename" compatibility="9.0.000" expanded="true" height="82" name="Rename Word1" width="90" x="447" y="187">
    <parameter key="old_name" value="word"/>
    <parameter key="new_name" value="word1"/>
    <list key="rename_additional_attributes"/>
    </operator>
    <operator activated="true" class="concurrency:join" compatibility="9.0.000" expanded="true" height="82" name="Join (2)" width="90" x="581" y="85">
    <parameter key="join_type" value="right"/>
    <parameter key="use_id_attribute_as_key" value="false"/>
    <list key="key_attributes">
    <parameter key="word_id" value="word_id2"/>
    </list>
    </operator>
    <operator activated="true" class="rename" compatibility="9.0.000" expanded="true" height="82" name="Rename Word2" width="90" x="715" y="85">
    <parameter key="old_name" value="word"/>
    <parameter key="new_name" value="word2"/>
    <list key="rename_additional_attributes"/>
    </operator>
    <operator activated="true" class="r_scripting:execute_r" compatibility="8.1.000" expanded="true" height="82" name="Graph" width="90" x="916" y="85">
    <parameter key="script" value="library(dplyr)&#10;library(tidytext)&#10;library(widyr)&#10;library(ggplot2)&#10;library(igraph)&#10;library(ggraph)&#10;&#10;rm_main = function(data)&#10;{&#10;table &lt;- data_frame(Item1 = data$word1, Item2 = data$word2, Frequency = data$frequency)&#10;table &lt;- as.data.frame(table)&#10;&#10;set.seed(2018)&#10;cooccurre_graph &lt;- table %&gt;%&#10;filter (Frequency==851)%&gt;%&#10;#filter (Frequency&gt;=500)%&gt;%&#10; graph_from_data_frame() %&gt;%&#10; ggraph(layout =&quot;lgl&quot;)+&#10; geom_edge_link()+&#10; geom_node_point(size = 3) +&#10; geom_node_text(aes(label=name), repel = TRUE, point.padding = unit(0.2, &quot;lines&quot;)) +&#10; theme_void()&#10;png(&quot;//home//knecht//cooccurrence_graph.png&quot;)&#10;plot(cooccurre_graph, width = 1600, height=900)&#10;dev.off()&#10;table &lt;- as.data.frame(table)&#10;return(list(table, cooccurre_graph))&#10;}&#10;"/>
    </operator>
    <connect from_op="Read XML Name Items" from_port="output" to_op="Multiply (2)" to_port="input"/>
    <connect from_op="Read XML Count Items" from_port="output" to_op="Join" to_port="right"/>
    <connect from_op="Multiply (2)" from_port="output 1" to_op="Join (2)" to_port="left"/>
    <connect from_op="Multiply (2)" from_port="output 2" to_op="Join" to_port="left"/>
    <connect from_op="Join" from_port="join" to_op="Rename Word1" to_port="example set input"/>
    <connect from_op="Rename Word1" from_port="example set output" to_op="Join (2)" to_port="right"/>
    <connect from_op="Join (2)" from_port="join" to_op="Rename Word2" to_port="example set input"/>
    <connect from_op="Rename Word2" from_port="example set output" to_op="Graph" to_port="input 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    </process>
    </operator>
    </process>

    Tobais

Answers

  • Telcontar120Telcontar120 Moderator, RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 1,635 Unicorn

    Not that I know of, but I would be interested if any other community members know a way to do this!

    Brian T.
    Lindon Ventures 
    Data Science Consulting from Certified RapidMiner Experts
  • TobiasNehrigTobiasNehrig Member Posts: 41 Guru

     Hi @Telcontar120,

    I found this post "Writing Association Rules to Exampleset or file" from @bhupendra_patil and I've tried to implement this in my process. But writing the FP-Growth result in a XML file blows nearly my RAM (32 GB) and creates a 8GB file. The mentioned Read XML Operation blows finally my RAM and the Process terminates.

     

    <?xml version="1.0" encoding="UTF-8"?><process version="8.2.001">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="8.2.001" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="subprocess" compatibility="8.2.001" expanded="true" height="82" name="Crawler" width="90" x="45" y="34">
    <process expanded="true">
    <operator activated="true" class="subprocess" compatibility="8.2.001" expanded="true" height="82" name="Crawler Spon" width="90" x="45" y="34">
    <process expanded="true">
    <operator activated="true" class="web:crawl_web_modern" compatibility="7.3.000" expanded="true" height="68" name="Crawl Web" width="90" x="112" y="34">
    <parameter key="url" value="http://www.spiegel.de"/>
    <list key="crawling_rules">
    <parameter key="store_with_matching_url" value=".+www.spiegel.+"/>
    <parameter key="follow_link_with_matching_url" value=".+spiegel.+|.+de.+"/>
    </list>
    <parameter key="max_crawl_depth" value="10"/>
    <parameter key="retrieve_as_html" value="true"/>
    <parameter key="add_content_as_attribute" value="true"/>
    <parameter key="max_pages" value="2000"/>
    <parameter key="max_page_size" value="100000"/>
    <parameter key="delay" value="100"/>
    <parameter key="max_concurrent_connections" value="200"/>
    <parameter key="max_connections_per_host" value="100"/>
    <parameter key="user_agent" value="Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:57.0) Gecko/20100101 Firefox/57.0"/>
    </operator>
    <operator activated="true" class="free_memory" compatibility="8.2.001" expanded="true" height="82" name="Free Memory (8)" width="90" x="246" y="34"/>
    <operator activated="true" class="web:retrieve_webpages" compatibility="7.3.000" expanded="true" height="68" name="Get Pages" width="90" x="447" y="34">
    <parameter key="link_attribute" value="Link"/>
    <parameter key="page_attribute" value="link"/>
    <parameter key="random_user_agent" value="true"/>
    </operator>
    <operator activated="true" class="free_memory" compatibility="8.2.001" expanded="true" height="82" name="Free Memory (7)" width="90" x="648" y="34"/>
    <connect from_op="Crawl Web" from_port="example set" to_op="Free Memory (8)" to_port="through 1"/>
    <connect from_op="Free Memory (8)" from_port="through 1" to_op="Get Pages" to_port="Example Set"/>
    <connect from_op="Get Pages" from_port="Example Set" to_op="Free Memory (7)" to_port="through 1"/>
    <connect from_op="Free Memory (7)" from_port="through 1" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data (3)" width="90" x="246" y="34">
    <parameter key="create_word_vector" value="false"/>
    <parameter key="keep_text" value="true"/>
    <list key="specify_weights"/>
    <process expanded="true">
    <operator activated="true" class="web:extract_html_text_content" compatibility="7.3.000" expanded="true" height="68" name="Extract Content" width="90" x="179" y="34">
    <parameter key="ignore_non_html_tags" value="false"/>
    </operator>
    <operator activated="true" class="free_memory" compatibility="8.2.001" expanded="true" height="82" name="Free Memory (9)" width="90" x="380" y="34"/>
    <connect from_port="document" to_op="Extract Content" to_port="document"/>
    <connect from_op="Extract Content" from_port="document" to_op="Free Memory (9)" to_port="through 1"/>
    <connect from_op="Free Memory (9)" from_port="through 1" to_port="document 1"/>
    <portSpacing port="source_document" spacing="0"/>
    <portSpacing port="sink_document 1" spacing="0"/>
    <portSpacing port="sink_document 2" spacing="0"/>
    </process>
    </operator>
    <connect from_op="Crawler Spon" from_port="out 1" to_op="Process Documents from Data (3)" to_port="example set"/>
    <connect from_op="Process Documents from Data (3)" from_port="example set" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="free_memory" compatibility="8.2.001" expanded="true" height="82" name="Free Memory" width="90" x="179" y="34"/>
    <operator activated="true" class="subprocess" compatibility="8.2.001" expanded="true" height="82" name="Prepare Data" width="90" x="313" y="34">
    <process expanded="true">
    <operator activated="true" class="set_role" compatibility="8.2.001" expanded="true" height="82" name="Set Role (2)" width="90" x="45" y="34">
    <parameter key="attribute_name" value="text"/>
    <list key="set_additional_roles">
    <parameter key="Title" value="label"/>
    </list>
    </operator>
    <operator activated="true" class="generate_id" compatibility="8.2.001" expanded="true" height="82" name="Generate ID" width="90" x="179" y="34"/>
    <operator activated="true" class="order_attributes" compatibility="8.2.001" expanded="true" height="82" name="Reorder Attributes" width="90" x="313" y="34">
    <parameter key="attribute_ordering" value="Title|text"/>
    </operator>
    <operator activated="true" class="select_attributes" compatibility="8.2.001" expanded="true" height="82" name="Select Attributes" width="90" x="447" y="34">
    <parameter key="attribute_filter_type" value="subset"/>
    <parameter key="attributes" value="text|Title|id"/>
    </operator>
    <operator activated="true" class="filter_examples" compatibility="8.2.001" expanded="true" height="103" name="Filter Examples" width="90" x="581" y="34">
    <list key="filters_list">
    <parameter key="filters_entry_key" value="Title.is_not_missing."/>
    </list>
    <parameter key="filters_logic_and" value="false"/>
    <parameter key="filters_check_metadata" value="false"/>
    </operator>
    <operator activated="true" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data" width="90" x="782" y="34">
    <parameter key="prune_method" value="percentual"/>
    <parameter key="prune_below_percent" value="10.0"/>
    <parameter key="prune_above_percent" value="100.0"/>
    <parameter key="prune_below_absolute" value="20"/>
    <parameter key="prune_above_absolute" value="2000"/>
    <list key="specify_weights"/>
    <process expanded="true">
    <operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize Non-letters (4)" width="90" x="45" y="34"/>
    <operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize Linguistic (4)" width="90" x="179" y="34">
    <parameter key="mode" value="linguistic sentences"/>
    <parameter key="language" value="German"/>
    </operator>
    <operator activated="true" class="text:filter_by_length" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (by Length)" width="90" x="313" y="34">
    <parameter key="min_chars" value="2"/>
    </operator>
    <operator activated="false" class="text:filter_stopwords_german" compatibility="8.1.000" expanded="true" height="68" name="Filter Stopwords (4)" width="90" x="179" y="187"/>
    <operator activated="false" class="subprocess" compatibility="8.2.001" expanded="true" height="82" name="Adblocker" width="90" x="447" y="187">
    <process expanded="true">
    <operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (2)" width="90" x="45" y="34">
    <parameter key="string" value="Bitte"/>
    <parameter key="invert condition" value="true"/>
    </operator>
    <operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (5)" width="90" x="179" y="34">
    <parameter key="string" value="deaktivieren"/>
    <parameter key="invert condition" value="true"/>
    </operator>
    <operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (11)" width="90" x="313" y="34">
    <parameter key="string" value="Ihren"/>
    <parameter key="invert condition" value="true"/>
    </operator>
    <operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (12)" width="90" x="447" y="34">
    <parameter key="string" value="Adblocker"/>
    <parameter key="invert condition" value="true"/>
    </operator>
    <operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (13)" width="90" x="581" y="34">
    <parameter key="string" value="warum"/>
    <parameter key="invert condition" value="true"/>
    </operator>
    <operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (14)" width="90" x="715" y="34">
    <parameter key="string" value="sehe"/>
    <parameter key="invert condition" value="true"/>
    </operator>
    <operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (15)" width="90" x="849" y="34">
    <parameter key="string" value="nicht"/>
    <parameter key="invert condition" value="true"/>
    </operator>
    <operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (16)" width="90" x="45" y="136">
    <parameter key="string" value="mehr"/>
    <parameter key="invert condition" value="true"/>
    </operator>
    <operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (9)" width="90" x="179" y="136">
    <parameter key="string" value="ausnahme"/>
    <parameter key="invert condition" value="true"/>
    </operator>
    <operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (17)" width="90" x="313" y="136">
    <parameter key="string" value="Erweiterungen"/>
    <parameter key="invert condition" value="true"/>
    </operator>
    <operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (18)" width="90" x="447" y="136">
    <parameter key="string" value="do-not-track-fuvktionen"/>
    <parameter key="invert condition" value="true"/>
    </operator>
    <operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (19)" width="90" x="581" y="136">
    <parameter key="string" value="inkognito-modus"/>
    <parameter key="invert condition" value="true"/>
    </operator>
    <operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (3)" width="90" x="715" y="136">
    <parameter key="string" value="werbung"/>
    <parameter key="invert condition" value="true"/>
    </operator>
    <operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (21)" width="90" x="849" y="136">
    <parameter key="string" value="informationen"/>
    <parameter key="invert condition" value="true"/>
    </operator>
    <operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (20)" width="90" x="45" y="238">
    <parameter key="string" value="bedeutung"/>
    <parameter key="invert condition" value="true"/>
    </operator>
    <operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (4)" width="90" x="179" y="238">
    <parameter key="string" value="browser"/>
    <parameter key="invert condition" value="true"/>
    </operator>
    <operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (8)" width="90" x="313" y="238">
    <parameter key="string" value="redaktion"/>
    <parameter key="invert condition" value="true"/>
    </operator>
    <operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (10)" width="90" x="447" y="238">
    <parameter key="string" value="forum"/>
    <parameter key="invert condition" value="true"/>
    </operator>
    <operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (by Content)" width="90" x="581" y="238">
    <parameter key="string" value="spiegel"/>
    <parameter key="invert condition" value="true"/>
    </operator>
    <operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (6)" width="90" x="715" y="238">
    <parameter key="string" value="einstellungen"/>
    <parameter key="invert condition" value="true"/>
    </operator>
    <operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (7)" width="90" x="849" y="238">
    <parameter key="string" value="klicken"/>
    <parameter key="invert condition" value="true"/>
    </operator>
    <operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (22)" width="90" x="45" y="340">
    <parameter key="string" value="Sicherheit"/>
    <parameter key="invert condition" value="true"/>
    </operator>
    <operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (23)" width="90" x="849" y="340">
    <parameter key="string" value="netz"/>
    <parameter key="invert condition" value="true"/>
    </operator>
    <connect from_port="in 1" to_op="Filter Tokens (2)" to_port="document"/>
    <connect from_op="Filter Tokens (2)" from_port="document" to_op="Filter Tokens (5)" to_port="document"/>
    <connect from_op="Filter Tokens (5)" from_port="document" to_op="Filter Tokens (11)" to_port="document"/>
    <connect from_op="Filter Tokens (11)" from_port="document" to_op="Filter Tokens (12)" to_port="document"/>
    <connect from_op="Filter Tokens (12)" from_port="document" to_op="Filter Tokens (13)" to_port="document"/>
    <connect from_op="Filter Tokens (13)" from_port="document" to_op="Filter Tokens (14)" to_port="document"/>
    <connect from_op="Filter Tokens (14)" from_port="document" to_op="Filter Tokens (15)" to_port="document"/>
    <connect from_op="Filter Tokens (15)" from_port="document" to_op="Filter Tokens (16)" to_port="document"/>
    <connect from_op="Filter Tokens (16)" from_port="document" to_op="Filter Tokens (9)" to_port="document"/>
    <connect from_op="Filter Tokens (9)" from_port="document" to_op="Filter Tokens (17)" to_port="document"/>
    <connect from_op="Filter Tokens (17)" from_port="document" to_op="Filter Tokens (18)" to_port="document"/>
    <connect from_op="Filter Tokens (18)" from_port="document" to_op="Filter Tokens (19)" to_port="document"/>
    <connect from_op="Filter Tokens (19)" from_port="document" to_op="Filter Tokens (3)" to_port="document"/>
    <connect from_op="Filter Tokens (3)" from_port="document" to_op="Filter Tokens (21)" to_port="document"/>
    <connect from_op="Filter Tokens (21)" from_port="document" to_op="Filter Tokens (20)" to_port="document"/>
    <connect from_op="Filter Tokens (20)" from_port="document" to_op="Filter Tokens (4)" to_port="document"/>
    <connect from_op="Filter Tokens (4)" from_port="document" to_op="Filter Tokens (8)" to_port="document"/>
    <connect from_op="Filter Tokens (8)" from_port="document" to_op="Filter Tokens (10)" to_port="document"/>
    <connect from_op="Filter Tokens (10)" from_port="document" to_op="Filter Tokens (by Content)" to_port="document"/>
    <connect from_op="Filter Tokens (by Content)" from_port="document" to_op="Filter Tokens (6)" to_port="document"/>
    <connect from_op="Filter Tokens (6)" from_port="document" to_op="Filter Tokens (7)" to_port="document"/>
    <connect from_op="Filter Tokens (7)" from_port="document" to_op="Filter Tokens (22)" to_port="document"/>
    <connect from_op="Filter Tokens (22)" from_port="document" to_op="Filter Tokens (23)" to_port="document"/>
    <connect from_op="Filter Tokens (23)" from_port="document" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="source_in 2" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="false" class="text:stem_snowball" compatibility="8.1.000" expanded="true" height="68" name="Stem (Snowball)" width="90" x="581" y="187">
    <parameter key="language" value="German"/>
    </operator>
    <operator activated="false" class="text:transform_cases" compatibility="8.1.000" expanded="true" height="68" name="Transform Cases (4)" width="90" x="313" y="187"/>
    <operator activated="true" class="free_memory" compatibility="8.2.001" expanded="true" height="82" name="Free Memory (4)" width="90" x="581" y="34"/>
    <connect from_port="document" to_op="Tokenize Non-letters (4)" to_port="document"/>
    <connect from_op="Tokenize Non-letters (4)" from_port="document" to_op="Tokenize Linguistic (4)" to_port="document"/>
    <connect from_op="Tokenize Linguistic (4)" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
    <connect from_op="Filter Tokens (by Length)" from_port="document" to_op="Free Memory (4)" to_port="through 1"/>
    <connect from_op="Free Memory (4)" from_port="through 1" to_port="document 1"/>
    <portSpacing port="source_document" spacing="0"/>
    <portSpacing port="sink_document 1" spacing="0"/>
    <portSpacing port="sink_document 2" spacing="0"/>
    </process>
    </operator>
    <connect from_port="in 1" to_op="Set Role (2)" to_port="example set input"/>
    <connect from_op="Set Role (2)" from_port="example set output" to_op="Generate ID" to_port="example set input"/>
    <connect from_op="Generate ID" from_port="example set output" to_op="Reorder Attributes" to_port="example set input"/>
    <connect from_op="Reorder Attributes" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
    <connect from_op="Select Attributes" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
    <connect from_op="Filter Examples" from_port="example set output" to_op="Process Documents from Data" to_port="example set"/>
    <connect from_op="Process Documents from Data" from_port="example set" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="source_in 2" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="subprocess" compatibility="8.2.001" expanded="true" height="124" name="fp Growth" width="90" x="447" y="34">
    <process expanded="true">
    <operator activated="true" class="text_to_nominal" compatibility="8.2.001" expanded="true" height="82" name="Text to Nominal" width="90" x="112" y="34"/>
    <operator activated="true" class="numerical_to_binominal" compatibility="8.2.001" expanded="true" height="82" name="Numerical to Binominal" width="90" x="246" y="34"/>
    <operator activated="true" class="concurrency:fp_growth" compatibility="8.2.001" expanded="true" height="82" name="FP-Growth" width="90" x="380" y="34">
    <parameter key="positive_value" value="true"/>
    <parameter key="min_requirement" value="frequency"/>
    <parameter key="min_support" value="0.5"/>
    <parameter key="min_frequency" value="2"/>
    <parameter key="min_items_per_itemset" value="2"/>
    <parameter key="max_items_per_itemset" value="3"/>
    <parameter key="max_number_of_itemsets" value="100000000"/>
    <parameter key="find_min_number_of_itemsets" value="false"/>
    <enumeration key="must_contain_list"/>
    </operator>
    <operator activated="true" class="multiply" compatibility="8.2.001" expanded="true" height="103" name="Multiply" width="90" x="581" y="85"/>
    <connect from_port="in 1" to_op="Text to Nominal" to_port="example set input"/>
    <connect from_op="Text to Nominal" from_port="example set output" to_op="Numerical to Binominal" to_port="example set input"/>
    <connect from_op="Numerical to Binominal" from_port="example set output" to_op="FP-Growth" to_port="example set"/>
    <connect from_op="FP-Growth" from_port="example set" to_port="out 1"/>
    <connect from_op="FP-Growth" from_port="frequent sets" to_op="Multiply" to_port="input"/>
    <connect from_op="Multiply" from_port="output 1" to_port="out 2"/>
    <connect from_op="Multiply" from_port="output 2" to_port="out 3"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="source_in 2" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    <portSpacing port="sink_out 3" spacing="0"/>
    <portSpacing port="sink_out 4" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="legacy:write" compatibility="8.2.001" expanded="true" height="68" name="Write" width="90" x="514" y="187">
    <parameter key="object_file" value="/home/knecht/FP_Growth_FrequencyItemSets.ioo"/>
    <parameter key="output_type" value="XML"/>
    </operator>
    <operator activated="false" class="create_association_rules" compatibility="8.2.001" expanded="true" height="82" name="Create Association Rules" width="90" x="715" y="85">
    <parameter key="criterion" value="conviction"/>
    <parameter key="min_criterion_value" value="0.68"/>
    </operator>
    <operator activated="true" class="advanced_file_connectors:read_xml" compatibility="8.2.001" expanded="true" height="68" name="Read XML" width="90" x="648" y="187">
    <parameter key="file" value="/home/knecht/FP_Growth_FrequencyItemSets.ioo"/>
    <parameter key="xpath_for_examples" value="//object-stream/FrequentItemSets/FrequentItemSets/default/com.rapidminer.operator.learner.associations.FrequentItemSet"/>
    <enumeration key="xpaths_for_attributes">
    <parameter key="xpath_for_attribute" value="Size[1]/text()"/>
    <parameter key="xpath_for_attribute" value="Support[1]/text()"/>
    <parameter key="xpath_for_attribute" value="Item1[1]/text()"/>
    <parameter key="xpath_for_attribute" value="Item2[1]/text()"/>
    </enumeration>
    <list key="namespaces"/>
    <list key="annotations"/>
    <parameter key="locale" value="German (Germany)"/>
    <list key="data_set_meta_data_information"/>
    </operator>
    <connect from_op="Crawler" from_port="out 1" to_op="Free Memory" to_port="through 1"/>
    <connect from_op="Free Memory" from_port="through 1" to_op="Prepare Data" to_port="in 1"/>
    <connect from_op="Prepare Data" from_port="out 1" to_op="fp Growth" to_port="in 1"/>
    <connect from_op="fp Growth" from_port="out 1" to_port="result 1"/>
    <connect from_op="fp Growth" from_port="out 2" to_port="result 2"/>
    <connect from_op="fp Growth" from_port="out 3" to_op="Write" to_port="object"/>
    <connect from_op="Read XML" from_port="output" to_port="result 4"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    <portSpacing port="sink_result 3" spacing="0"/>
    <portSpacing port="sink_result 4" spacing="0"/>
    <portSpacing port="sink_result 5" spacing="0"/>
    </process>
    </operator>
    </process>

     

  • JeffChowaniecJeffChowaniec Employee, Member Posts: 14 RM Data Scientist

    I'm curious as to which version of RM Studio you are using. 8.1 and below has the old versions of FP growth and frequent item sets. You might have to update to 8.2 to get a performance bump.

  • TobiasNehrigTobiasNehrig Member Posts: 41 Guru

    Hi @JeffChowaniec,

    I'm using RapidMiner 8.2.001

     

  • JeffChowaniecJeffChowaniec Employee, Member Posts: 14 RM Data Scientist

    I tried running your process and I found that the web crawl runs for 25+ mins and I wasn't able to finish the process because I need my machine for some other tasks. I have a 32gb machine and I could see it getting taxed pretty hard at some points. Have you tried it with a data set that is a fraction of what you are trying to query? The idea is we want to make sure that even a small data set in this case will run and not take up the available memory before we dedicate a 1 hr+ run time to this.

  • TobiasNehrigTobiasNehrig Member Posts: 41 Guru
    Hi @JeffChowaniec
    I haven't tried to crawl less pages because once I crawled stored it in the repository. This file is to huge to upload it here. Instead I here is a repository file after the Numerical to Binominal Operator as input data for FP-growth.
  • TobiasNehrigTobiasNehrig Member Posts: 41 Guru

    Hi,

    I think I've found my problem with the memory. I had to cut the FP-Growth parameter max items per itemset from 0 to 2. Now I struggle with filling the ExampleSet from the XML file "Writing Association Rules to Exampleset or file". In this example Data import wizard fullfills in Step 4 automaticly the column current value. That doesn't happens in my approach and I don't know why.

     

    <?xml version="1.0" encoding="UTF-8"?><process version="8.2.001">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="8.2.001" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="retrieve" compatibility="8.2.001" expanded="true" height="68" name="Retrieve 18-07-25 Prepared for FP-growth stop word" width="90" x="179" y="34">
    <parameter key="repository_entry" value="../Data/18-07-25 Prepared for FP-growth stop word"/>
    </operator>
    <operator activated="true" class="subprocess" compatibility="8.2.001" expanded="true" height="124" name="FP-Growth Sub" width="90" x="514" y="34">
    <process expanded="true">
    <operator activated="false" class="retrieve" compatibility="8.2.001" expanded="true" height="68" name="Retrieve 18-07-24-binominal for FP-Growth Stop word" width="90" x="246" y="187">
    <parameter key="repository_entry" value="../Data/18-07-24-binominal for FP-Growth Stop word"/>
    </operator>
    <operator activated="true" class="free_memory" compatibility="8.2.001" expanded="true" height="82" name="Free Memory (3)" width="90" x="45" y="34"/>
    <operator activated="true" class="text_to_nominal" compatibility="8.2.001" expanded="true" height="82" name="Text to Nominal" width="90" x="179" y="34"/>
    <operator activated="true" class="numerical_to_binominal" compatibility="8.2.001" expanded="true" height="82" name="Numerical to Binominal" width="90" x="380" y="34"/>
    <operator activated="true" class="concurrency:fp_growth" compatibility="8.2.001" expanded="true" height="82" name="FP-Growth" width="90" x="514" y="34">
    <parameter key="positive_value" value="true"/>
    <parameter key="min_support" value="0.001"/>
    <parameter key="min_frequency" value="2"/>
    <parameter key="max_items_per_itemset" value="2"/>
    <parameter key="max_number_of_itemsets" value="100000000"/>
    <parameter key="find_min_number_of_itemsets" value="false"/>
    <enumeration key="must_contain_list"/>
    </operator>
    <operator activated="true" class="multiply" compatibility="8.2.001" expanded="true" height="124" name="Multiply" width="90" x="715" y="34"/>
    <connect from_port="in 1" to_op="Free Memory (3)" to_port="through 1"/>
    <connect from_op="Free Memory (3)" from_port="through 1" to_op="Text to Nominal" to_port="example set input"/>
    <connect from_op="Text to Nominal" from_port="example set output" to_op="Numerical to Binominal" to_port="example set input"/>
    <connect from_op="Numerical to Binominal" from_port="example set output" to_op="FP-Growth" to_port="example set"/>
    <connect from_op="FP-Growth" from_port="frequent sets" to_op="Multiply" to_port="input"/>
    <connect from_op="Multiply" from_port="output 1" to_port="out 1"/>
    <connect from_op="Multiply" from_port="output 2" to_port="out 2"/>
    <connect from_op="Multiply" from_port="output 3" to_port="out 3"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="source_in 2" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    <portSpacing port="sink_out 3" spacing="0"/>
    <portSpacing port="sink_out 4" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="subprocess" compatibility="8.2.001" expanded="true" height="82" name="Create Graph" width="90" x="782" y="187">
    <process expanded="true">
    <operator activated="true" class="advanced_file_connectors:read_xml" compatibility="8.2.001" expanded="true" height="68" name="Read XML" width="90" x="246" y="34">
    <parameter key="file" value="/home/knecht/FP_Growth_FrequencyItemSets.ioo"/>
    <parameter key="xpath_for_examples" value="//object-stream/FrequentItemSets/FrequentItemSets/default/frequentSets/com.rapidminer.operator.learner.associations.FrequentItemSet"/>
    <enumeration key="xpaths_for_attributes">
    <parameter key="xpath_for_attribute" value="Size[1]/text()"/>
    <parameter key="xpath_for_attribute" value="Support[1]/text()"/>
    <parameter key="xpath_for_attribute" value="Item1[1]/text()"/>
    <parameter key="xpath_for_attribute" value="Item2[1]/text()"/>
    <parameter key="xpath_for_attribute" value="default[1]/frequentSets[1]/attribute::id"/>
    <parameter key="xpath_for_attribute" value="default[1]/frequentSets[1]/text()"/>
    <parameter key="xpath_for_attribute" value="FrequentItemSets[1]/default[1]/frequentSets[1]/attribute::id"/>
    <parameter key="xpath_for_attribute" value="FrequentItemSets[1]/default[1]/frequentSets[1]/text()"/>
    <parameter key="xpath_for_attribute" value="frequency[1]/text()"/>
    <parameter key="xpath_for_attribute" value="items[1]/com.rapidminer.extension.concurrency.operator.learner.associations.fpgrowth.NominalItem[2]/name[1]/text()"/>
    <parameter key="xpath_for_attribute" value="items[1]/com.rapidminer.extension.concurrency.operator.learner.associations.fpgrowth.NominalItem[1]/name[1]/text()"/>
    <parameter key="xpath_for_attribute" value="items[1]/com.rapidminer.extension.concurrency.operator.learner.associations.fpgrowth.NominalItem[1]/frequency[1]/text()"/>
    <parameter key="xpath_for_attribute" value="items[1]/com.rapidminer.extension.concurrency.operator.learner.associations.fpgrowth.NominalItem[2]/frequency[1]/text()"/>
    </enumeration>
    <list key="namespaces"/>
    <parameter key="use_default_namespace" value="false"/>
    <list key="annotations"/>
    <parameter key="locale" value="German (Germany)"/>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="Size[1]/text().true.polynominal.attribute"/>
    <parameter key="1" value="Support[1]/text().true.polynominal.attribute"/>
    <parameter key="2" value="Item1[1]/text().true.polynominal.attribute"/>
    <parameter key="3" value="Item2[1]/text().true.polynominal.attribute"/>
    <parameter key="4" value="default[1]/frequentSets[1]/attribute::id.true.polynominal.attribute"/>
    <parameter key="5" value="default[1]/frequentSets[1]/text().true.polynominal.attribute"/>
    <parameter key="6" value="FrequentItemSets[1]/default[1]/frequentSets[1]/attribute::id.true.polynominal.attribute"/>
    <parameter key="7" value="FrequentItemSets[1]/default[1]/frequentSets[1]/text().true.polynominal.attribute"/>
    <parameter key="8" value="frequency[1]/text().true.integer.attribute"/>
    <parameter key="9" value="items[1]/com\.rapidminer\.extension\.concurrency\.operator\.learner\.associations\.fpgrowth\.NominalItem[2]/name[1]/text().true.polynominal.attribute"/>
    <parameter key="10" value="items[1]/com\.rapidminer\.extension\.concurrency\.operator\.learner\.associations\.fpgrowth\.NominalItem[1]/name[1]/text().true.polynominal.attribute"/>
    <parameter key="11" value="items[1]/com\.rapidminer\.extension\.concurrency\.operator\.learner\.associations\.fpgrowth\.NominalItem[1]/frequency[1]/text().true.integer.attribute"/>
    <parameter key="12" value="items[1]/com\.rapidminer\.extension\.concurrency\.operator\.learner\.associations\.fpgrowth\.NominalItem[2]/frequency[1]/text().true.polynominal.attribute"/>
    </list>
    </operator>
    <operator activated="true" class="legacy:write" compatibility="8.2.001" expanded="true" height="68" name="Write" width="90" x="45" y="34">
    <parameter key="object_file" value="/home/knecht/FP_Growth_FrequencyItemSets.ioo"/>
    <parameter key="output_type" value="XML"/>
    </operator>
    <operator activated="true" class="rename" compatibility="8.2.001" expanded="true" height="82" name="Rename" width="90" x="380" y="34">
    <parameter key="old_name" value="items[1]/com.rapidminer.extension.concurrency.operator.learner.associations.fpgrowth.NominalItem[1]/name[1]/text()"/>
    <parameter key="new_name" value="Item1"/>
    <list key="rename_additional_attributes">
    <parameter key="Item2[1]/text()" value="Item2"/>
    <parameter key="Size[1]/text()" value="Size"/>
    <parameter key="frequency[1]/text()" value="Support"/>
    </list>
    </operator>
    <operator activated="false" class="r_scripting:execute_r" compatibility="8.1.000" expanded="true" height="68" name="Graph" width="90" x="514" y="34">
    <parameter key="script" value="library(dplyr)&#10;library(tidytext)&#10;library(widyr)&#10;library(ggplot2)&#10;library(igraph)&#10;library(ggraph)&#10;&#10;rm_main = function(data)&#10;{&#10;table &lt;- data_frame(Item1 = data$Item1, Item2 = data$Item2, Support = data$Support)&#10;table &lt;- as.data.frame(table)&#10; &#10;set.seed(2018)&#10;cooccurre_graph &lt;- table %&gt;%&#10;filter (Support&lt;=0.404)%&gt;%&#10; graph_from_data_frame() %&gt;%&#10; ggraph(layout =&quot;fr&quot;)+&#10; geom_edge_link(aes(edge_alpha = support, edge_width = Support), edge_colour = &quot;darkred&quot;)+&#10; geom_node_point(size = 5) +&#10; geom_node_text(aes(label=name), repel = TRUE, point.padding = unit(0.2, &quot;lines&quot;)) +&#10; theme_void()&#10;png(&quot;//home//knecht//cooccurrece1010.png&quot;)&#10;plot(cooccurre_graph)&#10;dev.off()&#10;table &lt;- as.data.frame(table)&#10;return(list(table))&#10;}&#10;"/>
    </operator>
    <connect from_port="in 1" to_op="Write" to_port="object"/>
    <connect from_op="Read XML" from_port="output" to_op="Rename" to_port="example set input"/>
    <connect from_op="Rename" from_port="example set output" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="source_in 2" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="create_association_rules" compatibility="8.2.001" expanded="true" height="82" name="Create Association Rules" width="90" x="782" y="34">
    <parameter key="min_confidence" value="0.68"/>
    </operator>
    <connect from_op="Retrieve 18-07-25 Prepared for FP-growth stop word" from_port="output" to_op="FP-Growth Sub" to_port="in 1"/>
    <connect from_op="FP-Growth Sub" from_port="out 1" to_op="Create Association Rules" to_port="item sets"/>
    <connect from_op="FP-Growth Sub" from_port="out 2" to_op="Create Graph" to_port="in 1"/>
    <connect from_op="Create Graph" from_port="out 1" to_port="result 3"/>
    <connect from_op="Create Association Rules" from_port="rules" to_port="result 1"/>
    <connect from_op="Create Association Rules" from_port="item sets" to_port="result 2"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    <portSpacing port="sink_result 3" spacing="0"/>
    <portSpacing port="sink_result 4" spacing="0"/>
    </process>
    </operator>
    </process>

     

  • TobiasNehrigTobiasNehrig Member Posts: 41 Guru

    Hi,

    it's me again.

    I'm trying to sort out how it might be possible add the Item names in @bhupendra_patil approach Writing-Association-Rules-to-Exampleset-or-file. The approach FP-Growth runs and I see all Columns more or less filed but if I'm using instead the new FP-Growth the Item names are not shown. Has anyone an idea how this is is possible?

     

    <?xml version="1.0" encoding="UTF-8"?><process version="8.2.001">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="retrieve" compatibility="8.2.001" expanded="true" height="68" name="Iris" width="90" x="45" y="120">
    <parameter key="repository_entry" value="//Samples/data/Iris"/>
    </operator>
    <operator activated="true" class="discretize_by_frequency" compatibility="7.1.001" expanded="true" height="103" name="Discretize by Frequency" width="90" x="179" y="120">
    <parameter key="number_of_bins" value="5"/>
    <parameter key="range_name_type" value="short"/>
    </operator>
    <operator activated="true" class="nominal_to_binominal" compatibility="7.1.001" expanded="true" height="103" name="Nominal to Binominal" width="90" x="313" y="120">
    <parameter key="transform_binominal" value="true"/>
    <parameter key="use_underscore_in_name" value="true"/>
    </operator>
    <operator activated="true" class="multiply" compatibility="8.2.001" expanded="true" height="103" name="Multiply" width="90" x="447" y="136"/>
    <operator activated="true" class="concurrency:fp_growth" compatibility="8.2.001" expanded="true" height="82" name="FP-Growth" width="90" x="581" y="442">
    <parameter key="min_requirement" value="frequency"/>
    <parameter key="min_support" value="0.1"/>
    <enumeration key="must_contain_list"/>
    </operator>
    <operator activated="true" class="fp_growth" compatibility="8.2.001" expanded="true" height="82" name="FPGrowth" width="90" x="581" y="136">
    <parameter key="find_min_number_of_itemsets" value="false"/>
    <parameter key="min_number_of_itemsets" value="1"/>
    <parameter key="min_support" value="0.1"/>
    <parameter key="max_items" value="4"/>
    </operator>
    <operator activated="true" class="create_association_rules" compatibility="8.2.001" expanded="true" height="82" name="Create Association Rules" width="90" x="782" y="238"/>
    <operator activated="true" class="legacy:write" compatibility="8.2.001" expanded="true" height="68" name="Write" width="90" x="916" y="238">
    <parameter key="object_file" value="C:\!!PreSales\demos\assosciationrulesOld.ioo"/>
    <parameter key="output_type" value="XML"/>
    </operator>
    <operator activated="true" class="advanced_file_connectors:read_xml" compatibility="8.1.000" expanded="true" height="68" name="Read XML" width="90" x="782" y="136">
    <parameter key="file" value="C:\!!PreSales\demos\assosciationrulesOld.ioo"/>
    <parameter key="xpath_for_examples" value="//object-stream/AssociationRules/AssociationRules/default/associationRules/com.rapidminer.operator.learner.associations.AssociationRule"/>
    <enumeration key="xpaths_for_attributes">
    <parameter key="xpath_for_attribute" value="confidence[1]/text()"/>
    <parameter key="xpath_for_attribute" value="totalSupport[1]/text()"/>
    <parameter key="xpath_for_attribute" value="lift[1]/text()"/>
    <parameter key="xpath_for_attribute" value="laplace[1]/text()"/>
    <parameter key="xpath_for_attribute" value="gain[1]/text()"/>
    <parameter key="xpath_for_attribute" value="ps[1]/text()"/>
    <parameter key="xpath_for_attribute" value="conviction[1]/text()"/>
    <parameter key="xpath_for_attribute" value="premise[1]/com.rapidminer.operator.learner.associations.BooleanAttributeItem[1]/frequency[1]/text()"/>
    <parameter key="xpath_for_attribute" value="premise[1]/com.rapidminer.operator.learner.associations.BooleanAttributeItem[1]/name[1]/text()"/>
    <parameter key="xpath_for_attribute" value="conclusion[1]/com.rapidminer.operator.learner.associations.BooleanAttributeItem[1]/frequency[1]/text()"/>
    <parameter key="xpath_for_attribute" value="conclusion[1]/com.rapidminer.operator.learner.associations.BooleanAttributeItem[1]/name[1]/text()"/>
    </enumeration>
    <list key="namespaces"/>
    <parameter key="use_default_namespace" value="false"/>
    <list key="annotations"/>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="confidence[1]/text().true.attribute_value.attribute"/>
    <parameter key="1" value="totalSupport[1]/text().true.attribute_value.attribute"/>
    <parameter key="2" value="lift[1]/text().true.attribute_value.attribute"/>
    <parameter key="3" value="laplace[1]/text().true.attribute_value.attribute"/>
    <parameter key="4" value="gain[1]/text().true.attribute_value.attribute"/>
    <parameter key="5" value="ps[1]/text().true.attribute_value.attribute"/>
    <parameter key="6" value="conviction[1]/text().true.attribute_value.attribute"/>
    <parameter key="7" value="premise[1]/com\.rapidminer\.operator\.learner\.associations\.BooleanAttributeItem[1]/frequency[1]/text().true.attribute_value.attribute"/>
    <parameter key="8" value="premise[1]/com\.rapidminer\.operator\.learner\.associations\.BooleanAttributeItem[1]/name[1]/text().true.attribute_value.attribute"/>
    <parameter key="9" value="conclusion[1]/com\.rapidminer\.operator\.learner\.associations\.BooleanAttributeItem[1]/frequency[1]/text().true.attribute_value.attribute"/>
    <parameter key="10" value="conclusion[1]/com\.rapidminer\.operator\.learner\.associations\.BooleanAttributeItem[1]/name[1]/text().true.attribute_value.attribute"/>
    </list>
    </operator>
    <operator activated="true" class="rename" compatibility="8.2.001" expanded="true" height="82" name="Rename" width="90" x="916" y="136">
    <parameter key="old_name" value="gain[1]/text()"/>
    <parameter key="new_name" value="gain"/>
    <list key="rename_additional_attributes">
    <parameter key="lift[1]/text()" value="lift"/>
    <parameter key="laplace[1]/text()" value="laplace"/>
    <parameter key="confidence[1]/text()" value="confidence"/>
    <parameter key="conviction[1]/text()" value="conviction"/>
    <parameter key="ps[1]/text()" value="ps"/>
    <parameter key="totalSupport[1]/text()" value="total support"/>
    <parameter key="conclusion[1]/com.rapidminer.operator.learner.associations.BooleanAttributeItem[1]/frequency[1]/text()" value="conclusion_frequency"/>
    <parameter key="conclusion[1]/com.rapidminer.operator.learner.associations.BooleanAttributeItem[1]/name[1]/text()" value="conlucison_name"/>
    <parameter key="premise[1]/com.rapidminer.operator.learner.associations.BooleanAttributeItem[1]/frequency[1]/text()" value="premise_frequency"/>
    <parameter key="premise[1]/com.rapidminer.operator.learner.associations.BooleanAttributeItem[1]/name[1]/text()" value="premise_name"/>
    </list>
    </operator>
    <operator activated="true" class="advanced_file_connectors:read_xml" compatibility="8.1.000" expanded="true" height="68" name="Read XML (2)" width="90" x="782" y="544">
    <parameter key="file" value="C:\!!PreSales\demos\assosciationrulesNew.ioo"/>
    <parameter key="xpath_for_examples" value="//object-stream/AssociationRules/AssociationRules/default/associationRules/com.rapidminer.operator.learner.associations.AssociationRule"/>
    <enumeration key="xpaths_for_attributes">
    <parameter key="xpath_for_attribute" value="confidence[1]/text()"/>
    <parameter key="xpath_for_attribute" value="totalSupport[1]/text()"/>
    <parameter key="xpath_for_attribute" value="lift[1]/text()"/>
    <parameter key="xpath_for_attribute" value="laplace[1]/text()"/>
    <parameter key="xpath_for_attribute" value="gain[1]/text()"/>
    <parameter key="xpath_for_attribute" value="ps[1]/text()"/>
    <parameter key="xpath_for_attribute" value="conviction[1]/text()"/>
    <parameter key="xpath_for_attribute" value="premise[1]/com.rapidminer.operator.learner.associations.BooleanAttributeItem[1]/frequency[1]/text()"/>
    <parameter key="xpath_for_attribute" value="premise[1]/com.rapidminer.operator.learner.associations.BooleanAttributeItem[1]/name[1]/text()"/>
    <parameter key="xpath_for_attribute" value="conclusion[1]/com.rapidminer.operator.learner.associations.BooleanAttributeItem[1]/frequency[1]/text()"/>
    <parameter key="xpath_for_attribute" value="conclusion[1]/com.rapidminer.operator.learner.associations.BooleanAttributeItem[1]/name[1]/text()"/>
    </enumeration>
    <list key="namespaces"/>
    <parameter key="use_default_namespace" value="false"/>
    <list key="annotations"/>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="confidence[1]/text().true.attribute_value.attribute"/>
    <parameter key="1" value="totalSupport[1]/text().true.attribute_value.attribute"/>
    <parameter key="2" value="lift[1]/text().true.attribute_value.attribute"/>
    <parameter key="3" value="laplace[1]/text().true.attribute_value.attribute"/>
    <parameter key="4" value="gain[1]/text().true.attribute_value.attribute"/>
    <parameter key="5" value="ps[1]/text().true.attribute_value.attribute"/>
    <parameter key="6" value="conviction[1]/text().true.attribute_value.attribute"/>
    <parameter key="7" value="premise[1]/com\.rapidminer\.operator\.learner\.associations\.BooleanAttributeItem[1]/frequency[1]/text().true.attribute_value.attribute"/>
    <parameter key="8" value="premise[1]/com\.rapidminer\.operator\.learner\.associations\.BooleanAttributeItem[1]/name[1]/text().true.attribute_value.attribute"/>
    <parameter key="9" value="conclusion[1]/com\.rapidminer\.operator\.learner\.associations\.BooleanAttributeItem[1]/frequency[1]/text().true.attribute_value.attribute"/>
    <parameter key="10" value="conclusion[1]/com\.rapidminer\.operator\.learner\.associations\.BooleanAttributeItem[1]/name[1]/text().true.attribute_value.attribute"/>
    </list>
    </operator>
    <operator activated="true" class="rename" compatibility="8.2.001" expanded="true" height="82" name="Rename (2)" width="90" x="916" y="544">
    <parameter key="old_name" value="gain[1]/text()"/>
    <parameter key="new_name" value="gain"/>
    <list key="rename_additional_attributes">
    <parameter key="lift[1]/text()" value="lift"/>
    <parameter key="laplace[1]/text()" value="laplace"/>
    <parameter key="confidence[1]/text()" value="confidence"/>
    <parameter key="conviction[1]/text()" value="conviction"/>
    <parameter key="ps[1]/text()" value="ps"/>
    <parameter key="totalSupport[1]/text()" value="total support"/>
    <parameter key="conclusion[1]/com.rapidminer.operator.learner.associations.BooleanAttributeItem[1]/frequency[1]/text()" value="conclusion_frequency"/>
    <parameter key="conclusion[1]/com.rapidminer.operator.learner.associations.BooleanAttributeItem[1]/name[1]/text()" value="conlucison_name"/>
    <parameter key="premise[1]/com.rapidminer.operator.learner.associations.BooleanAttributeItem[1]/frequency[1]/text()" value="premise_frequency"/>
    <parameter key="premise[1]/com.rapidminer.operator.learner.associations.BooleanAttributeItem[1]/name[1]/text()" value="premise_name"/>
    </list>
    </operator>
    <operator activated="true" class="create_association_rules" compatibility="8.2.001" expanded="true" height="82" name="Create Association Rules (2)" width="90" x="782" y="442"/>
    <operator activated="true" class="legacy:write" compatibility="8.2.001" expanded="true" height="68" name="Write (2)" width="90" x="916" y="442">
    <parameter key="object_file" value="C:\!!PreSales\demos\assosciationrulesNew.ioo"/>
    <parameter key="output_type" value="XML"/>
    </operator>
    <connect from_op="Iris" from_port="output" to_op="Discretize by Frequency" to_port="example set input"/>
    <connect from_op="Discretize by Frequency" from_port="example set output" to_op="Nominal to Binominal" to_port="example set input"/>
    <connect from_op="Nominal to Binominal" from_port="example set output" to_op="Multiply" to_port="input"/>
    <connect from_op="Multiply" from_port="output 1" to_op="FPGrowth" to_port="example set"/>
    <connect from_op="Multiply" from_port="output 2" to_op="FP-Growth" to_port="example set"/>
    <connect from_op="FP-Growth" from_port="frequent sets" to_op="Create Association Rules (2)" to_port="item sets"/>
    <connect from_op="FPGrowth" from_port="frequent sets" to_op="Create Association Rules" to_port="item sets"/>
    <connect from_op="Create Association Rules" from_port="rules" to_op="Write" to_port="object"/>
    <connect from_op="Read XML" from_port="output" to_op="Rename" to_port="example set input"/>
    <connect from_op="Rename" from_port="example set output" to_port="result 1"/>
    <connect from_op="Read XML (2)" from_port="output" to_op="Rename (2)" to_port="example set input"/>
    <connect from_op="Rename (2)" from_port="example set output" to_port="result 2"/>
    <connect from_op="Create Association Rules (2)" from_port="rules" to_op="Write (2)" to_port="object"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="90"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    <portSpacing port="sink_result 3" spacing="0"/>
    </process>
    </operator>
    </process>

     

    If I'm using this approach on my process, than I'll see all the numerical values but no item names.

     

    best regards

    Tobias

     

Sign In or Register to comment.