Due to recent updates, all users are required to create an Altair One account to login to the RapidMiner community. Click the Register button to create your account using the same email that you have previously used to login to the RapidMiner community. This will ensure that any previously created content will be synced to your Altair One account. Once you login, you will be asked to provide a username that identifies you to other Community users. Email us at Community with questions.
LDAEvaluationWordNet - wrong connection
Dear all
I am new to the community and also new to this field, I apologize for the dumb questions in advanced.
I am trying to run LDAEvaluationWordNet for my text mining data. However, it always shows that '' Your connection is producing the wrong type of data.......''. (Refer the XML code)
Can someone help me with that? Thank you!
I am new to the community and also new to this field, I apologize for the dumb questions in advanced.
I am trying to run LDAEvaluationWordNet for my text mining data. However, it always shows that '' Your connection is producing the wrong type of data.......''. (Refer the XML code)
Can someone help me with that? Thank you!
<?xml version="1.0" encoding="UTF-8"?><process version="9.5.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="concurrency:loop_files" compatibility="9.5.001" expanded="true" height="82" name="Loop Files (2)" width="90" x="112" y="34">
<parameter key="directory" value="C:\Users\Kevin\Desktop\REF PDF"/>
<parameter key="filter_type" value="regex"/>
<parameter key="filter_by_regex" value=".*.pdf"/>
<parameter key="recursive" value="false"/>
<parameter key="enable_macros" value="true"/>
<parameter key="macro_for_file_name" value="file_name"/>
<parameter key="macro_for_file_type" value="file_type"/>
<parameter key="macro_for_folder_name" value="folder_name"/>
<parameter key="reuse_results" value="false"/>
<parameter key="enable_parallel_execution" value="true"/>
<process expanded="true">
<operator activated="true" class="text:read_document" compatibility="8.2.000" expanded="true" height="68" name="Read Document" width="90" x="45" y="34">
<parameter key="extract_text_only" value="true"/>
<parameter key="use_file_extension_as_type" value="true"/>
<parameter key="content_type" value="pdf"/>
<parameter key="encoding" value="SYSTEM"/>
</operator>
<operator activated="true" class="text:process_documents" compatibility="8.2.000" expanded="true" height="103" name="Process Documents" width="90" x="246" y="34">
<parameter key="create_word_vector" value="true"/>
<parameter key="vector_creation" value="TF-IDF"/>
<parameter key="add_meta_information" value="true"/>
<parameter key="keep_text" value="false"/>
<parameter key="prune_method" value="none"/>
<parameter key="prune_below_percent" value="3.0"/>
<parameter key="prune_above_percent" value="30.0"/>
<parameter key="prune_below_absolute" value="2"/>
<parameter key="prune_above_absolute" value="9999"/>
<parameter key="prune_below_rank" value="5.0"/>
<parameter key="prune_above_rank" value="5.0"/>
<parameter key="datamanagement" value="double_sparse_array"/>
<parameter key="data_management" value="auto"/>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="8.2.000" expanded="true" height="68" name="Tokenize" width="90" x="179" y="136">
<parameter key="mode" value="non letters"/>
<parameter key="characters" value=".:"/>
<parameter key="language" value="English"/>
<parameter key="max_token_length" value="3"/>
</operator>
<operator activated="true" class="text:stem_porter" compatibility="8.2.000" expanded="true" height="68" name="Stem (Porter)" width="90" x="447" y="136"/>
<operator activated="true" class="text:filter_stopwords_english" compatibility="8.2.000" expanded="true" height="68" name="Filter Stopwords (English)" width="90" x="648" y="136"/>
<connect from_port="document" to_op="Tokenize" to_port="document"/>
<connect from_op="Tokenize" from_port="document" to_op="Stem (Porter)" to_port="document"/>
<connect from_op="Stem (Porter)" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
<connect from_op="Filter Stopwords (English)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="text:wordlist_to_data" compatibility="8.2.000" expanded="true" height="82" name="WordList to Data" width="90" x="447" y="34"/>
<connect from_port="file object" to_op="Read Document" to_port="file"/>
<connect from_op="Read Document" from_port="output" to_op="Process Documents" to_port="documents 1"/>
<connect from_op="Process Documents" from_port="word list" to_op="WordList to Data" to_port="word list"/>
<connect from_op="WordList to Data" from_port="example set" to_port="output 1"/>
<portSpacing port="source_file object" spacing="0"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="subprocess" compatibility="9.5.001" expanded="true" height="82" name="Union Append" width="90" x="246" y="34">
<process expanded="true">
<operator activated="true" class="loop_collection" compatibility="9.5.001" expanded="true" height="82" name="Output (4)" width="90" x="45" y="34">
<parameter key="set_iteration_macro" value="true"/>
<parameter key="macro_name" value="iteration"/>
<parameter key="macro_start_value" value="1"/>
<parameter key="unfold" value="false"/>
<process expanded="true">
<operator activated="false" breakpoints="after" class="select" compatibility="9.5.001" expanded="true" height="68" name="Select (5)" width="90" x="112" y="34">
<parameter key="index" value="%{iteration}"/>
<parameter key="unfold" value="false"/>
</operator>
<operator activated="true" class="branch" compatibility="9.5.001" expanded="true" height="82" name="Branch (2)" width="90" x="313" y="34">
<parameter key="condition_type" value="expression"/>
<parameter key="expression" value="%{iteration}==1"/>
<parameter key="io_object" value="ANOVAMatrix"/>
<parameter key="return_inner_output" value="true"/>
<process expanded="true">
<connect from_port="condition" to_port="input 1"/>
<portSpacing port="source_condition" spacing="0"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_input 1" spacing="0"/>
<portSpacing port="sink_input 2" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="recall" compatibility="9.5.001" expanded="true" height="68" name="Recall (5)" width="90" x="45" y="187">
<parameter key="name" value="LoopData"/>
<parameter key="io_object" value="ExampleSet"/>
<parameter key="remove_from_store" value="true"/>
</operator>
<operator activated="true" class="union" compatibility="9.5.001" expanded="true" height="82" name="Union (2)" width="90" x="179" y="34"/>
<connect from_port="condition" to_op="Union (2)" to_port="example set 1"/>
<connect from_op="Recall (5)" from_port="result" to_op="Union (2)" to_port="example set 2"/>
<connect from_op="Union (2)" from_port="union" to_port="input 1"/>
<portSpacing port="source_condition" spacing="0"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_input 1" spacing="0"/>
<portSpacing port="sink_input 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="remember" compatibility="9.5.001" expanded="true" height="68" name="Remember (5)" width="90" x="581" y="34">
<parameter key="name" value="LoopData"/>
<parameter key="io_object" value="ExampleSet"/>
<parameter key="store_which" value="1"/>
<parameter key="remove_from_process" value="true"/>
</operator>
<connect from_port="single" to_op="Branch (2)" to_port="condition"/>
<connect from_op="Branch (2)" from_port="input 1" to_op="Remember (5)" to_port="store"/>
<connect from_op="Remember (5)" from_port="stored" to_port="output 1"/>
<portSpacing port="source_single" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="select" compatibility="9.5.001" expanded="true" height="68" name="Select (6)" width="90" x="179" y="34">
<parameter key="index" value="%{iteration}"/>
<parameter key="unfold" value="false"/>
</operator>
<connect from_port="in 1" to_op="Output (4)" to_port="collection"/>
<connect from_op="Output (4)" from_port="output 1" to_op="Select (6)" to_port="collection"/>
<connect from_op="Select (6)" from_port="selected" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="wordnet:open_wordnet_dictionary" compatibility="5.3.000" expanded="true" height="68" name="Open WordNet Dictionary" width="90" x="380" y="136">
<parameter key="resource_type" value="directory"/>
<parameter key="directory" value="C:\Users\Kevin\Desktop\WordNet-3.0\dict"/>
</operator>
<operator activated="true" class="corpus_linguistics_plugin_LDA:LDAEvaluationWordNet" compatibility="1.1.001" expanded="true" height="82" name="LDAEvaluationWordNet" width="90" x="380" y="34">
<parameter key="iterations" value="2000"/>
<parameter key="word net source files" value="/home/poelitz/Downloads/WordNet-2.1/dict"/>
<parameter key="method" value="WuPalmer"/>
</operator>
<connect from_op="Loop Files (2)" from_port="output 1" to_op="Union Append" to_port="in 1"/>
<connect from_op="Union Append" from_port="out 1" to_op="LDAEvaluationWordNet" to_port="example set input"/>
<connect from_op="Open WordNet Dictionary" from_port="dictionary" to_op="LDAEvaluationWordNet" to_port="example set words"/>
<connect from_op="LDAEvaluationWordNet" from_port="output neg log likelihoods" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
0
Best Answer
-
sgenzer Administrator, Moderator, Employee, RapidMiner Certified Analyst, Community Manager, Member, University Professor, PM Moderator Posts: 2,959 Community Managerhi @Ka13n hmm that operator causing you trouble is from the "Corpus Lingustics LDA" extension out of TU Dortmund. To be honest I had never heard of it until this post. I would recommend going to the website and asking the author about your problem. His email is posted: https://www-ai.cs.tu-dortmund.de/PERSONAL/poelitz.html
Scott5
Answers
Thanks for your kindly reply, I'll shoot him an email for the solution!