Options

network connection with Get Pages - operator

minerminer Member Posts: 13 Contributor II
edited November 2018 in Help

Hi there,

 

for a test I created a list of 9 URL in an excel-sheet.

Now I´m trying to test the following process

 

Read Excel > Get Pages > Data to Documents > further processing...

 

When I set a breakpoint after Read Excel I get an example set of the 9 URLs.

As soon as I try Get Pages the result is an error saying "Could not connect to the specified URL. Please check your network connection."

Here is my process:

<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.5.003" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" breakpoints="after" class="read_excel" compatibility="7.5.003" expanded="true" height="68" name="Read Excel" width="90" x="45" y="34">
<parameter key="excel_file" value="C:\Users\xxx\Desktop\Crawler\test_url.xls"/>
<parameter key="imported_cell_range" value="A1:A9"/>
<parameter key="first_row_as_names" value="false"/>
<list key="annotations"/>
<parameter key="locale" value="German (Germany)"/>
<list key="data_set_meta_data_information">
<parameter key="0" value="A.true.file_path.label"/>
</list>
</operator>
<operator activated="true" class="web:retrieve_webpages" compatibility="7.3.000" expanded="true" height="68" name="Get Pages" width="90" x="179" y="136">
<parameter key="link_attribute" value="A"/>
<parameter key="page_attribute" value="*"/>
<parameter key="user_agent" value="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"/>
<parameter key="connection_timeout" value="100000"/>
<parameter key="read_timeout" value="100000"/>
<parameter key="accept_cookies" value="all"/>
<parameter key="delay" value="random"/>
<parameter key="min_delay_amount" value="200"/>
</operator>
<operator activated="true" class="text:data_to_documents" compatibility="7.5.000" expanded="true" height="68" name="Data to Documents" width="90" x="313" y="34">
<list key="specify_weights"/>
</operator>
<operator activated="true" class="text:process_documents" compatibility="7.5.000" expanded="true" height="103" name="Process Documents" width="90" x="447" y="85">
<process expanded="true">
<operator activated="true" class="web:extract_html_text_content" compatibility="7.3.000" expanded="true" height="68" name="Extract Content" width="90" x="45" y="34"/>
<operator activated="true" class="text:tokenize" compatibility="7.5.000" expanded="true" height="68" name="Tokenize" width="90" x="179" y="34"/>
<operator activated="true" class="text:filter_stopwords_german" compatibility="7.5.000" expanded="true" height="68" name="Filter Stopwords (German)" width="90" x="45" y="136"/>
<operator activated="true" class="text:stem_snowball" compatibility="7.5.000" expanded="true" height="68" name="Stem (Snowball)" width="90" x="179" y="136"/>
<operator activated="true" class="text:transform_cases" compatibility="7.5.000" expanded="true" height="68" name="Transform Cases" width="90" x="45" y="238"/>
<operator activated="true" class="text:generate_n_grams_characters" compatibility="7.5.000" expanded="true" height="68" name="Generate n-Grams (Characters)" width="90" x="179" y="238">
<parameter key="keep_terms" value="true"/>
</operator>
<connect from_port="document" to_op="Extract Content" to_port="document"/>
<connect from_op="Extract Content" from_port="document" to_op="Tokenize" to_port="document"/>
<connect from_op="Tokenize" from_port="document" to_op="Filter Stopwords (German)" to_port="document"/>
<connect from_op="Filter Stopwords (German)" from_port="document" to_op="Stem (Snowball)" to_port="document"/>
<connect from_op="Stem (Snowball)" from_port="document" to_op="Transform Cases" to_port="document"/>
<connect from_op="Transform Cases" from_port="document" to_op="Generate n-Grams (Characters)" to_port="document"/>
<connect from_op="Generate n-Grams (Characters)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<connect from_op="Read Excel" from_port="output" to_op="Get Pages" to_port="Example Set"/>
<connect from_op="Get Pages" from_port="Example Set" to_op="Data to Documents" to_port="example set"/>
<connect from_op="Data to Documents" from_port="documents" to_op="Process Documents" to_port="documents 1"/>
<connect from_op="Process Documents" from_port="example set" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>

My network connection works fine. I tested with the Crawl Web - operator and this works fine.

I already changed connection timeout and read timeout parameter but with no effect.

Any ideas what the reason for this error could be?

 

Thanks

miner

Best Answer

  • Options
    lionelderkrikorlionelderkrikor Moderator, RapidMiner Certified Analyst, Member Posts: 1,195 Unicorn
    Solution Accepted

    Hi @miner

     

    It seems that the web adress has to begin with http:// or https:// and not with www.

    I made the tests with www.google.fr and indeed i have the same error as you, 

    on the other hand with https://www.google.fr/no error is occuring.

    My process : 

    <?xml version="1.0" encoding="UTF-8"?><process version="8.0.001">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="8.0.001" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="read_excel" compatibility="8.0.001" expanded="true" height="68" name="Read Excel" width="90" x="112" y="34">
    <parameter key="excel_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\Get_pages\Get_pages.xlsx"/>
    <parameter key="imported_cell_range" value="A1:A3"/>
    <parameter key="first_row_as_names" value="false"/>
    <list key="annotations">
    <parameter key="0" value="Name"/>
    </list>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="web_adress.true.polynominal.attribute"/>
    </list>
    </operator>
    <operator activated="true" class="web:retrieve_webpages" compatibility="7.3.000" expanded="true" height="68" name="Get Pages" width="90" x="313" y="34">
    <parameter key="link_attribute" value="web_adress"/>
    </operator>
    <operator activated="true" class="text:data_to_documents" compatibility="7.5.000" expanded="true" height="68" name="Data to Documents" width="90" x="447" y="34">
    <list key="specify_weights"/>
    </operator>
    <operator activated="true" class="text:documents_to_data" compatibility="7.5.000" expanded="true" height="82" name="Documents to Data" width="90" x="648" y="34">
    <parameter key="text_attribute" value="Text"/>
    </operator>
    <connect from_op="Read Excel" from_port="output" to_op="Get Pages" to_port="Example Set"/>
    <connect from_op="Get Pages" from_port="Example Set" to_op="Data to Documents" to_port="example set"/>
    <connect from_op="Data to Documents" from_port="documents" to_op="Documents to Data" to_port="documents 1"/>
    <connect from_op="Documents to Data" from_port="example set" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    </process>
    </operator>
    </process>

    Regards,

     

    Lionel

Answers

  • Options
    minerminer Member Posts: 13 Contributor II

    Hi Lionel,

     

    it is surprisingly as simple as that - thank you.

    google.fr in excel (which should be http://google.fr in an ordinary browser) doesn´t work also - I tested that so the get pages operator needs a valid GET-statement.

     

    Just another question: Some of the Response-Codes are 200 = OK but there are some delivering 301 = moved permanently.

    I put in my own website for testing as well and this is permanently moved from http:// to https://

    So I tried a workaround with an if-statement:

    if ( Response_Code == (301) ,

        if ( starts (A,"http:"), replace (A,"http:","https:"),

           )
           
        )

    But now RM says: inadmissible input at ")"

    I have 2 arguments instead of 3 because the then-action would be no-action.

    Do you know the placeholder in RM for no-action in if-statements so that I can go for:

    if Response_Code==301, replace http with https, otherwise do nothing.

    Thanks

    miner

     

  • Options
    lionelderkrikorlionelderkrikor Moderator, RapidMiner Certified Analyst, Member Posts: 1,195 Unicorn

    Hi again @miner,

     

    Try this two possible solutions : 

     

     - if (starts (A,"http:"), replace (A,"http:","https:"), "" 

     

    or

     

     - if (starts (A,"http:"), replace (A,"http:","https:"), replace (A,"https:","https:")

     

    I hope it helps

     

    Regards,

     

    Lionel
           
        

  • Options
    lionelderkrikorlionelderkrikor Moderator, RapidMiner Certified Analyst, Member Posts: 1,195 Unicorn

    Hi again @miner,

     

    the first solution of my previous topic is stupid

     

    Maybe the second is working

     

    but you can try this more logical solution : 

     

    if (starts (A,"http:"), replace (A,"http:","https:"), A ) 

     

    Regards, 

     

    Lionel

     

  • Options
    Thomas_OttThomas_Ott RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 1,761 Unicorn

    Without checking out your process @lionelderkrikor, you could consider using a Handle Exception operator. So if http:// doesn't work, you could just try https://

  • Options
    lionelderkrikorlionelderkrikor Moderator, RapidMiner Certified Analyst, Member Posts: 1,195 Unicorn

    Hi @Thomas_Ott,

     

    I did'nt know this operator (Handle Exception).

    I noted and recorded.I think in deed that it will be useful.

     

     

    I tested a lot of combinations (loop side try/ loop side catch...)

    and in fine I can not manage to the desired conclusion in this special case study ("https" to all example sets) .

     

    I'm sure that i have missed/forget something, so can you please detail as it works in this special case study or equivalent case study: 

    For my DataScience and Rapidminer culture, can you provide a process with this operator please (I provide a link to an excel file) :

     

    https://drive.google.com/open?id=1GGP_J5VUdBje0jlquu5I_21Dk1s_KXAg

     

    Thank you, Thomas, for giving me a little time,

     

    Best regards, 

     

    Lionel

     

     

     

     

     

     

     

  • Options
    minerminer Member Posts: 13 Contributor II

    Hi again,

     

    @lionelderkrikor thank you for your suggestion.

    I tried your if-statement but the isolated statement just changes http to https for every record. That causes the same problem the other way round which is an error on GET https where the website is only on http.

    So I just need the new attribute for the records that throw a 301.

    I tried again with this:

    if (("Response-Code") == (301),
    if ( starts (A,"http:"), replace (A,"http:","https:"),A)
    ,"")

    which leaves the new attribute blank for all the records.

    So I would also be interested in a proces of @Thomas_Ott using the Handle Exception - operator.

    To make it a little more complicated: I came across four different URL-notations:

    http://abc.com

    https://abc.com

    http://www.abc.com

    https://www.abc.com

    Is this '4-peace-exception' something the Handle Exception - operator is able to manage?

     

    Thanks

    miner

  • Options
    Thomas_OttThomas_Ott RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 1,761 Unicorn

    @lionelderkrikor I was thinking of a setup like this:

     

    <?xml version="1.0" encoding="UTF-8"?><process version="8.0.001">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="8.0.001" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="false" class="read_excel" compatibility="8.0.001" expanded="true" height="68" name="Read Excel" width="90" x="45" y="289">
    <parameter key="excel_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\Get_pages\Get_pages.xlsx"/>
    <parameter key="imported_cell_range" value="A1:A3"/>
    <parameter key="first_row_as_names" value="false"/>
    <list key="annotations">
    <parameter key="0" value="Name"/>
    </list>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="web_adress.true.polynominal.attribute"/>
    </list>
    </operator>
    <operator activated="false" class="web:retrieve_webpages" compatibility="7.3.000" expanded="true" height="68" name="Get Pages" width="90" x="179" y="289">
    <parameter key="link_attribute" value="web_adress"/>
    </operator>
    <operator activated="false" class="text:data_to_documents" compatibility="7.5.000" expanded="true" height="68" name="Data to Documents" width="90" x="313" y="289">
    <list key="specify_weights"/>
    </operator>
    <operator activated="false" class="text:documents_to_data" compatibility="7.5.000" expanded="true" height="82" name="Documents to Data" width="90" x="514" y="289">
    <parameter key="text_attribute" value="Text"/>
    </operator>
    <operator activated="true" class="handle_exception" compatibility="8.0.001" expanded="true" height="82" name="Handle Exception" width="90" x="246" y="34">
    <process expanded="true">
    <operator activated="true" class="web:get_webpage" compatibility="7.3.000" expanded="true" height="68" name="Get Page" width="90" x="45" y="34">
    <parameter key="url" value="http://google.fr/search?q=data+science"/>
    <list key="query_parameters"/>
    <list key="request_properties"/>
    </operator>
    <operator activated="true" class="text:html_to_xml" compatibility="7.5.000" expanded="true" height="68" name="HTML to XML" width="90" x="179" y="34"/>
    <connect from_op="Get Page" from_port="output" to_op="HTML to XML" to_port="document"/>
    <connect from_op="HTML to XML" from_port="document" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="source_in 2" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    <process expanded="true">
    <operator activated="true" class="web:get_webpage" compatibility="7.3.000" expanded="true" height="68" name="Get Page (2)" width="90" x="45" y="34">
    <parameter key="url" value="https://google.fr/search?q=data+science"/>
    <list key="query_parameters"/>
    <list key="request_properties"/>
    </operator>
    <operator activated="true" class="text:html_to_xml" compatibility="7.5.000" expanded="true" height="68" name="HTML to XML (2)" width="90" x="179" y="34"/>
    <connect from_op="Get Page (2)" from_port="output" to_op="HTML to XML (2)" to_port="document"/>
    <connect from_op="HTML to XML (2)" from_port="document" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="source_in 2" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    </operator>
    <connect from_op="Get Pages" from_port="Example Set" to_op="Data to Documents" to_port="example set"/>
    <connect from_op="Data to Documents" from_port="documents" to_op="Documents to Data" to_port="documents 1"/>
    <connect from_op="Handle Exception" from_port="out 1" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    </process>
    </operator>
    </process>

    You first 'try' the the http:\\ set of processes. if that fails (i.e. 'catch'), you do the same process again but with https:\\. If the base url is all the same, i.e. google.fr, then you can using a macro you can extract a list of the various search terms (search?q=data+science, search?q=rapidminer, etc) and loop through it. 

  • Options
    Thomas_OttThomas_Ott RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 1,761 Unicorn

    @miner as I posted above, you could just create a list of abc.com and www.abc.com, then loop through them in a Handle Exception that has http:// on the Try side and https:// on the Catch side.  The Handle Exception operator is an awesome production type of operator. 

  • Options
    minerminer Member Posts: 13 Contributor II

    Hi @Thomas_Ott

     

    I tried your process with the Handle Exception - operator.

    Within that I run the Get Pages - operator with http:// and on the catch https://.

    But the result table is giving me Response-Code 200 and 301 for http://

    How can I train the operator to consider 301 als false?

    It seems, that Get Pages takes the response of the server 301 as a regular response and put it as attribute into the example set.

    What additional operator do I have to use for getting 200 a good result and 301 false?

    Thanks

    miner

  • Options
    landland RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 2,531 Unicorn

    Hi,

     

    well, first make sure you checked the parameter "follow redirects" in the Get Pages operator. Usually this should follow the redirects by moved temporarily or permanently...

    If that doesn't work, you simply could do it this way:

    1. Try all urls with http:// in front

    2. Split the result into examples which are correctly returned (Code 200) and the others. You can use the Filter Examples operator for that.

    3. Replace http:// by https:// and try again

    4. Split again as in step 2.

    5. Add www. in front of domain name. and try again

    ...

    In the end you append all correctly loaded pages using Append operator. If attributes of returned datas set are different, Super Set Operators or a single Super Set (Advanced) from the Jackhammer Exntesion will help you.

     

    Hope that helps.

     

    Greetings,

     Sebastian

  • Options
    minerminer Member Posts: 13 Contributor II

    Hi @land

     

    that was basically the first thing I tried but then I ran into the problem of 'network connection'

    network connection.jpg

    So it seems that I need to use the Handle Exception - operator.

    But I´m stuck using this as I don´t seem to capture completely the concept of Handle Exception.

    Basically I thought it should work like you suggested within the operator but I´m not able to get it done properly.

    This is my current try:

    ?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="7.5.003" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="read_excel" compatibility="7.5.003" expanded="true" height="68" name="Read Excel" width="90" x="45" y="34">
    <parameter key="excel_file" value="C:\Users\xxx\Desktop\Crawler\test_url.xls"/>
    <parameter key="imported_cell_range" value="A1:A76"/>
    <parameter key="first_row_as_names" value="false"/>
    <list key="annotations"/>
    <parameter key="locale" value="German (Germany)"/>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="A.true.file_path.attribute"/>
    </list>
    </operator>
    <operator activated="true" class="handle_exception" compatibility="7.5.003" expanded="true" height="82" name="Handle Exception" width="90" x="179" y="34">
    <parameter key="exception_macro" value="Response_Code301"/>
    <process expanded="true">
    <operator activated="true" class="web:retrieve_webpages" compatibility="7.3.000" expanded="true" height="68" name="Get Pages" width="90" x="45" y="34">
    <parameter key="link_attribute" value="A"/>
    <parameter key="random_user_agent" value="true"/>
    <parameter key="accept_cookies" value="all"/>
    </operator>
    <operator activated="true" class="filter_examples" compatibility="7.5.003" expanded="true" height="103" name="Filter Examples" width="90" x="179" y="34">
    <parameter key="parameter_string" value="'Response-Code=200'"/>
    <parameter key="condition_class" value="attribute_value_filter"/>
    <list key="filters_list"/>
    </operator>
    <connect from_port="in 1" to_op="Get Pages" to_port="Example Set"/>
    <connect from_op="Get Pages" from_port="Example Set" to_op="Filter Examples" to_port="example set input"/>
    <connect from_op="Filter Examples" from_port="example set output" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="source_in 2" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    <process expanded="true">
    <operator activated="true" class="web:retrieve_webpages" compatibility="7.3.000" expanded="true" height="68" name="Get Pages (2)" width="90" x="112" y="34">
    <parameter key="link_attribute" value="A"/>
    <parameter key="random_user_agent" value="true"/>
    <parameter key="accept_cookies" value="all"/>
    </operator>
    <connect from_port="in 1" to_op="Get Pages (2)" to_port="Example Set"/>
    <connect from_op="Get Pages (2)" from_port="Example Set" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="source_in 2" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    </operator>
    <connect from_op="Read Excel" from_port="output" to_op="Handle Exception" to_port="in 1"/>
    <connect from_op="Handle Exception" from_port="out 1" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    </process>
    </operator>
    </process>

    But with this I get the warning of 'dublicate attribute'

    dublicate attribute.jpg

     

    Maybe someone can help me to take a deep dive into Handle Exception...

    Cheers

    miner

     

  • Options
    minerminer Member Posts: 13 Contributor II

    Hi @land

     

    this was the first way I tried to go through the process but then I ran into the problem of network connection

     

    network connection.jpg

    So it seems that I have to use Handle Exception - operator.

    But I´m stuck trying this as it seems that I don´t capture the concept of Handle Exception.

    I thought it should work like you suggested within the operator but that doesn´t work.

    This is my current try:

    <?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="7.5.003" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="read_excel" compatibility="7.5.003" expanded="true" height="68" name="Read Excel" width="90" x="45" y="34">
    <parameter key="excel_file" value="C:\Users\ralf\Desktop\Crawler\test_url.xls"/>
    <parameter key="imported_cell_range" value="A1:A76"/>
    <parameter key="first_row_as_names" value="false"/>
    <list key="annotations"/>
    <parameter key="locale" value="German (Germany)"/>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="A.true.file_path.attribute"/>
    </list>
    </operator>
    <operator activated="true" class="handle_exception" compatibility="7.5.003" expanded="true" height="82" name="Handle Exception" width="90" x="179" y="34">
    <parameter key="exception_macro" value="Response_Code301"/>
    <process expanded="true">
    <operator activated="true" class="web:retrieve_webpages" compatibility="7.3.000" expanded="true" height="68" name="Get Pages" width="90" x="45" y="34">
    <parameter key="link_attribute" value="A"/>
    <parameter key="random_user_agent" value="true"/>
    <parameter key="accept_cookies" value="all"/>
    </operator>
    <operator activated="true" class="filter_examples" compatibility="7.5.003" expanded="true" height="103" name="Filter Examples" width="90" x="179" y="34">
    <parameter key="parameter_string" value="'Response-Code=200'"/>
    <parameter key="condition_class" value="attribute_value_filter"/>
    <list key="filters_list"/>
    </operator>
    <connect from_port="in 1" to_op="Get Pages" to_port="Example Set"/>
    <connect from_op="Get Pages" from_port="Example Set" to_op="Filter Examples" to_port="example set input"/>
    <connect from_op="Filter Examples" from_port="example set output" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="source_in 2" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    <process expanded="true">
    <operator activated="true" class="web:retrieve_webpages" compatibility="7.3.000" expanded="true" height="68" name="Get Pages (2)" width="90" x="112" y="34">
    <parameter key="link_attribute" value="A"/>
    <parameter key="random_user_agent" value="true"/>
    <parameter key="accept_cookies" value="all"/>
    </operator>
    <connect from_port="in 1" to_op="Get Pages (2)" to_port="Example Set"/>
    <connect from_op="Get Pages (2)" from_port="Example Set" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="source_in 2" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    </operator>
    <connect from_op="Read Excel" from_port="output" to_op="Handle Exception" to_port="in 1"/>
    <connect from_op="Handle Exception" from_port="out 1" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    </process>
    </operator>
    </process>

    But this brings the error message of 'dublicate attribute'

    dublicate attribute.jpg

    So maybe there is someone help me out and take a deep dive in Handle Expression....

    Cheers

    miner

     

Sign In or Register to comment.