The RapidMiner community is on read-only mode until further notice. Technical support via cases will continue to work as is. For any urgent licensing related requests from Students/Faculty members, please use the Altair academic forum here.
Support-Agglomerative clustering performance [SOLVED]
I am struggling with clustering processes and more specific agglomerative algorithm.
I have used all the available core studio clustering performances and none method seems to be compatible with the hierarchical clustering...I am always getting errors.
I suppose that I have to combine some other operators to get it work, but I am a beginner and I need help.
Any suggestions?
My process is really simple :
I am struggling with clustering processes and more specific agglomerative algorithm.
I have used all the available core studio clustering performances and none method seems to be compatible with the hierarchical clustering...I am always getting errors.
I suppose that I have to combine some other operators to get it work, but I am a beginner and I need help.
Any suggestions?
My process is really simple :
<?xml version="1.0" encoding="UTF-8" standalone="no"?>Thank you in advance
<process version="5.3.015">
<operator activated="true" class="process" compatibility="5.3.015" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="text:process_document_from_file" compatibility="5.3.002" expanded="true" height="76" name="Process Documents from Files" width="90" x="45" y="30">
<list key="text_directories">
<parameter key="business" value="C:\Users\vassiliki\Desktop\bbc-fulltext\business"/>
<parameter key="entertainment" value="C:\Users\vassiliki\Desktop\bbc-fulltext\entertainment"/>
<parameter key="politics" value="C:\Users\vassiliki\Desktop\bbc-fulltext\politics"/>
<parameter key="sport" value="C:\Users\vassiliki\Desktop\bbc-fulltext\sport"/>
<parameter key="tech" value="C:\Users\vassiliki\Desktop\bbc-fulltext\tech"/>
<parameter key="prune_method" value="percentual"/>
<parameter key="prune_above_percent" value="80.0"/>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="5.3.002" expanded="true" height="60" name="Tokenize" width="90" x="45" y="30"/>
<operator activated="true" class="text:filter_stopwords_english" compatibility="5.3.002" expanded="true" height="60" name="Filter Stopwords (English)" width="90" x="179" y="30"/>
<operator activated="true" class="text:stem_snowball" compatibility="5.3.002" expanded="true" height="60" name="Stem (Snowball)" width="90" x="313" y="30"/>
<operator activated="true" class="text:transform_cases" compatibility="5.3.002" expanded="true" height="60" name="Transform Cases" width="90" x="447" y="30"/>
<connect from_port="document" to_op="Tokenize" to_port="document"/>
<connect from_op="Tokenize" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
<connect from_op="Filter Stopwords (English)" from_port="document" to_op="Stem (Snowball)" to_port="document"/>
<connect from_op="Stem (Snowball)" from_port="document" to_op="Transform Cases" to_port="document"/>
<connect from_op="Transform Cases" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
<operator activated="true" class="agglomerative_clustering" compatibility="5.3.015" expanded="true" height="76" name="Clustering" width="90" x="246" y="30">
<parameter key="mode" value="AverageLink"/>
<parameter key="measure_types" value="NumericalMeasures"/>
<operator activated="true" class="map_clustering_on_labels" compatibility="5.3.015" expanded="true" height="76" name="Map Clustering on Labels" width="90" x="447" y="30"/>
<connect from_op="Process Documents from Files" from_port="example set" to_op="Clustering" to_port="example set"/>
<connect from_op="Clustering" from_port="cluster model" to_op="Map Clustering on Labels" to_port="cluster model"/>
<connect from_op="Clustering" from_port="example set" to_op="Map Clustering on Labels" to_port="example set"/>
<connect from_op="Map Clustering on Labels" from_port="example set" to_port="result 1"/>
<connect from_op="Map Clustering on Labels" from_port="cluster model" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
what do you want to do in first place? For a simple agglomerative clustering, try this:
Dortmund, Germany