Due to recent updates, all users are required to create an Altair One account to login to the RapidMiner community. Click the Register button to create your account using the same email that you have previously used to login to the RapidMiner community. This will ensure that any previously created content will be synced to your Altair One account. Once you login, you will be asked to provide a username that identifies you to other Community users. Email us at Community with questions.

wrapping GetPage in HandleException - cannot open connection

cindyharpercindyharper Member Posts: 9 Contributor II
edited November 2018 in Help
I'm getting this error for every call of GetPage in a HamdlException wrapper inside a Loop Examples loop.

May 18, 2012 10:48:04 AM WARNING: Handle Exception: Error occurred and will be neglected by Handle Exception: could not establish connection
May 18, 2012 10:48:04 AM WARNING: Handle Exception: Error occurred and will be neglected by Handle Exception: could not establish connection
May 18, 2012 10:48:04 AM WARNING: Handle Exception: Error occurred and will be neglected by Handle Exception: could not establish connection
May 18, 2012 10:48:04 AM WARNING: Handle Exception: Error occurred and will be neglected by Handle Exception: could not establish connection
May 18, 2012 10:48:05 AM INFO: Saving results.

Here's my process:


<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.006">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.2.006" expanded="true" name="Process">
    <parameter key="logverbosity" value="all"/>
    <parameter key="logfile" value="C:\Documents and Settings\charper\My Documents\ALiNBUS\googlepagesstep3log"/>
    <parameter key="resultfile" value="C:\Documents and Settings\charper\My Documents\ALiNBUS\googlepagesstep3result"/>
    <process expanded="true" height="415" width="487">
      <operator activated="true" class="retrieve" compatibility="5.2.006" expanded="true" height="60" name="Retrieve" width="90" x="45" y="30">
        <parameter key="repository_entry" value="GooglePagesStep2"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="5.2.006" expanded="true" height="76" name="Select Attributes" width="90" x="45" y="210">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attribute" value="URL"/>
        <parameter key="attributes" value="Content-Length|Content-Type|Date|Expires|Last-Modified|Response-Code|Response-Message|URL|token_number"/>
        <parameter key="invert_selection" value="true"/>
      </operator>
      <operator activated="true" class="generate_attributes" compatibility="5.2.006" expanded="true" height="76" name="Generate Attributes (2)" width="90" x="112" y="165">
        <list key="function_descriptions">
          <parameter key="PDF" value="contains(Href,&quot;.pdf&quot;)"/>
        </list>
      </operator>
      <operator activated="true" class="filter_examples" compatibility="5.2.006" expanded="true" height="76" name="Filter Examples" width="90" x="179" y="75"/>
      <operator activated="true" class="loop_examples" compatibility="5.2.006" expanded="true" height="94" name="Loop Examples" width="90" x="246" y="30">
        <process expanded="true" height="357" width="480">
          <operator activated="true" class="handle_exception" compatibility="5.2.006" expanded="true" height="94" name="Handle Exception" width="90" x="45" y="30">
            <parameter key="exception_macro" value="exceptionmsg"/>
            <process expanded="true" height="375" width="225">
              <operator activated="true" class="web:get_webpage" compatibility="5.1.004" expanded="true" height="60" name="Get Page" width="90" x="45" y="30">
                <parameter key="url" value="Href"/>
                <parameter key="random_user_agent" value="true"/>
                <parameter key="connection_timeout" value="100000"/>
                <parameter key="read_timeout" value="100000"/>
                <parameter key="accept_cookies" value="all"/>
                <list key="query_parameters"/>
              </operator>
              <operator activated="true" class="text:extract_token_number" compatibility="5.2.001" expanded="true" height="60" name="Extract Token Number" width="90" x="45" y="120">
                <parameter key="condition" value="matches"/>
                <parameter key="regular_expression" value="LIBRAR"/>
              </operator>
              <operator activated="true" class="text:extract_token_number" compatibility="5.2.001" expanded="true" height="60" name="Extract Token Number (2)" width="90" x="45" y="210">
                <parameter key="metadata_key" value="LIBRARnumber"/>
                <parameter key="condition" value="matches"/>
                <parameter key="regular_expression" value="LIBRAR"/>
                <parameter key="invert_condition" value="true"/>
              </operator>
              <operator activated="true" class="text:documents_to_data" compatibility="5.2.001" expanded="true" height="76" name="Documents to Data (2)" width="90" x="112" y="300">
                <parameter key="text_attribute" value="NewsletterDoc"/>
                <parameter key="label_attribute" value="Href"/>
              </operator>
              <connect from_port="in 1" to_port="out 1"/>
              <connect from_op="Get Page" from_port="output" to_op="Extract Token Number" to_port="document"/>
              <connect from_op="Extract Token Number" from_port="document" to_op="Extract Token Number (2)" to_port="document"/>
              <connect from_op="Extract Token Number (2)" from_port="document" to_op="Documents to Data (2)" to_port="documents 1"/>
              <connect from_op="Documents to Data (2)" from_port="example set" to_port="out 2"/>
              <portSpacing port="source_in 1" spacing="0"/>
              <portSpacing port="source_in 2" spacing="0"/>
              <portSpacing port="source_in 3" spacing="0"/>
              <portSpacing port="sink_out 1" spacing="0"/>
              <portSpacing port="sink_out 2" spacing="0"/>
              <portSpacing port="sink_out 3" spacing="0"/>
            </process>
            <process expanded="true" height="375" width="202">
              <connect from_port="in 1" to_port="out 1"/>
              <connect from_port="in 2" to_port="out 2"/>
              <portSpacing port="source_in 1" spacing="0"/>
              <portSpacing port="source_in 2" spacing="0"/>
              <portSpacing port="source_in 3" spacing="0"/>
              <portSpacing port="sink_out 1" spacing="0"/>
              <portSpacing port="sink_out 2" spacing="0"/>
              <portSpacing port="sink_out 3" spacing="0"/>
            </process>
          </operator>
          <connect from_port="example set" to_op="Handle Exception" to_port="in 1"/>
          <connect from_op="Handle Exception" from_port="out 1" to_port="example set"/>
          <connect from_op="Handle Exception" from_port="out 2" to_port="output 1"/>
          <portSpacing port="source_example set" spacing="0"/>
          <portSpacing port="sink_example set" spacing="0"/>
          <portSpacing port="sink_output 1" spacing="0"/>
          <portSpacing port="sink_output 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="flatten_collection" compatibility="5.2.006" expanded="true" height="60" name="Flatten Collection" width="90" x="246" y="165"/>
      <operator activated="true" class="store" compatibility="5.2.006" expanded="true" height="60" name="Store (2)" width="90" x="313" y="210">
        <parameter key="repository_entry" value="GooglePagesStep3Docs"/>
      </operator>
      <operator activated="true" class="store" compatibility="5.2.006" expanded="true" height="60" name="Store" width="90" x="380" y="120">
        <parameter key="repository_entry" value="GooglePagesStep3Store"/>
      </operator>
      <connect from_op="Retrieve" from_port="output" to_op="Select Attributes" to_port="example set input"/>
      <connect from_op="Select Attributes" from_port="example set output" to_op="Generate Attributes (2)" to_port="example set input"/>
      <connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
      <connect from_op="Filter Examples" from_port="example set output" to_op="Loop Examples" to_port="example set"/>
      <connect from_op="Loop Examples" from_port="example set" to_op="Store" to_port="input"/>
      <connect from_op="Loop Examples" from_port="output 1" to_op="Flatten Collection" to_port="collection"/>
      <connect from_op="Flatten Collection" from_port="flat" to_op="Store (2)" to_port="input"/>
      <connect from_op="Store (2)" from_port="through" to_port="result 2"/>
      <connect from_op="Store" from_port="through" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
    </process>
  </operator>
</process>

Any suggestions? How do I delay between GETs for the Loop Examples loop? Will that help?

Answers

  • MariusHelfMariusHelf RapidMiner Certified Expert, Member Posts: 1,869 Unicorn
    For the delay you could try the Delay operator :)

    Does the process run without Handle Exception? Your thread title indicates that you have no/less problems without that operator.
    Oh no, I just saw that you entered "Href" as URL in Get Page - of course that does not work as it is not a valid url.
    I suppose you have a Href attribute in your example set. Then you need something as below. Please note the Extract Macro operator in Loop Examples, and the use of the href-macro in Get Page.

    Best,
    Marius
    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="5.2.006">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="5.2.006" expanded="true" name="Process">
        <parameter key="logverbosity" value="all"/>
        <parameter key="logfile" value="C:\Documents and Settings\charper\My Documents\ALiNBUS\googlepagesstep3log"/>
        <parameter key="resultfile" value="C:\Documents and Settings\charper\My Documents\ALiNBUS\googlepagesstep3result"/>
        <process expanded="true" height="480" width="705">
          <operator activated="true" class="retrieve" compatibility="5.2.006" expanded="true" height="60" name="Retrieve" width="90" x="45" y="30">
            <parameter key="repository_entry" value="GooglePagesStep2"/>
          </operator>
          <operator activated="true" class="select_attributes" compatibility="5.2.006" expanded="true" height="76" name="Select Attributes" width="90" x="180" y="30">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attribute" value="URL"/>
            <parameter key="attributes" value="Content-Length|Content-Type|Date|Expires|Last-Modified|Response-Code|Response-Message|URL|token_number"/>
            <parameter key="invert_selection" value="true"/>
          </operator>
          <operator activated="true" class="generate_attributes" compatibility="5.2.006" expanded="true" height="76" name="Generate Attributes (2)" width="90" x="315" y="30">
            <list key="function_descriptions">
              <parameter key="PDF" value="contains(Href,&quot;.pdf&quot;)"/>
            </list>
          </operator>
          <operator activated="true" class="filter_examples" compatibility="5.2.006" expanded="true" height="76" name="Filter Examples" width="90" x="450" y="30"/>
          <operator activated="true" class="loop_examples" compatibility="5.2.006" expanded="true" height="94" name="Loop Examples" width="90" x="112" y="210">
            <process expanded="true" height="357" width="480">
              <operator activated="true" class="extract_macro" compatibility="5.2.006" expanded="true" height="60" name="Extract Macro" width="90" x="45" y="30">
                <parameter key="macro" value="href"/>
                <parameter key="macro_type" value="data_value"/>
                <parameter key="attribute_name" value="Href"/>
                <parameter key="example_index" value="%{example}"/>
              </operator>
              <operator activated="true" class="handle_exception" compatibility="5.2.006" expanded="true" height="94" name="Handle Exception" width="90" x="246" y="30">
                <parameter key="exception_macro" value="exceptionmsg"/>
                <process expanded="true" height="386" width="279">
                  <operator activated="true" class="web:get_webpage" compatibility="5.2.000" expanded="true" height="60" name="Get Page" width="90" x="45" y="30">
                    <parameter key="url" value="%{href}"/>
                    <parameter key="random_user_agent" value="true"/>
                    <parameter key="connection_timeout" value="100000"/>
                    <parameter key="read_timeout" value="100000"/>
                    <parameter key="accept_cookies" value="all"/>
                    <list key="query_parameters"/>
                    <list key="request_properties"/>
                  </operator>
                  <operator activated="true" class="text:extract_token_number" compatibility="5.2.002" expanded="true" height="60" name="Extract Token Number" width="90" x="45" y="120">
                    <parameter key="condition" value="matches"/>
                    <parameter key="regular_expression" value="LIBRAR"/>
                  </operator>
                  <operator activated="true" class="text:extract_token_number" compatibility="5.2.002" expanded="true" height="60" name="Extract Token Number (2)" width="90" x="45" y="210">
                    <parameter key="metadata_key" value="LIBRARnumber"/>
                    <parameter key="condition" value="matches"/>
                    <parameter key="regular_expression" value="LIBRAR"/>
                    <parameter key="invert_condition" value="true"/>
                  </operator>
                  <operator activated="true" class="text:documents_to_data" compatibility="5.2.002" expanded="true" height="76" name="Documents to Data (2)" width="90" x="179" y="210">
                    <parameter key="text_attribute" value="NewsletterDoc"/>
                    <parameter key="label_attribute" value="Href"/>
                  </operator>
                  <connect from_port="in 1" to_port="out 1"/>
                  <connect from_op="Get Page" from_port="output" to_op="Extract Token Number" to_port="document"/>
                  <connect from_op="Extract Token Number" from_port="document" to_op="Extract Token Number (2)" to_port="document"/>
                  <connect from_op="Extract Token Number (2)" from_port="document" to_op="Documents to Data (2)" to_port="documents 1"/>
                  <connect from_op="Documents to Data (2)" from_port="example set" to_port="out 2"/>
                  <portSpacing port="source_in 1" spacing="0"/>
                  <portSpacing port="source_in 2" spacing="0"/>
                  <portSpacing port="source_in 3" spacing="0"/>
                  <portSpacing port="sink_out 1" spacing="0"/>
                  <portSpacing port="sink_out 2" spacing="0"/>
                  <portSpacing port="sink_out 3" spacing="0"/>
                </process>
                <process expanded="true" height="375" width="202">
                  <connect from_port="in 1" to_port="out 1"/>
                  <connect from_port="in 2" to_port="out 2"/>
                  <portSpacing port="source_in 1" spacing="0"/>
                  <portSpacing port="source_in 2" spacing="0"/>
                  <portSpacing port="source_in 3" spacing="0"/>
                  <portSpacing port="sink_out 1" spacing="0"/>
                  <portSpacing port="sink_out 2" spacing="0"/>
                  <portSpacing port="sink_out 3" spacing="0"/>
                </process>
              </operator>
              <connect from_port="example set" to_op="Extract Macro" to_port="example set"/>
              <connect from_op="Extract Macro" from_port="example set" to_op="Handle Exception" to_port="in 1"/>
              <connect from_op="Handle Exception" from_port="out 1" to_port="example set"/>
              <connect from_op="Handle Exception" from_port="out 2" to_port="output 1"/>
              <portSpacing port="source_example set" spacing="0"/>
              <portSpacing port="sink_example set" spacing="0"/>
              <portSpacing port="sink_output 1" spacing="0"/>
              <portSpacing port="sink_output 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="flatten_collection" compatibility="5.2.006" expanded="true" height="60" name="Flatten Collection" width="90" x="246" y="255"/>
          <operator activated="true" class="store" compatibility="5.2.006" expanded="true" height="60" name="Store (2)" width="90" x="380" y="255">
            <parameter key="repository_entry" value="GooglePagesStep3Docs"/>
          </operator>
          <operator activated="true" class="store" compatibility="5.2.006" expanded="true" height="60" name="Store" width="90" x="246" y="165">
            <parameter key="repository_entry" value="GooglePagesStep3Store"/>
          </operator>
          <connect from_op="Retrieve" from_port="output" to_op="Select Attributes" to_port="example set input"/>
          <connect from_op="Select Attributes" from_port="example set output" to_op="Generate Attributes (2)" to_port="example set input"/>
          <connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
          <connect from_op="Filter Examples" from_port="example set output" to_op="Loop Examples" to_port="example set"/>
          <connect from_op="Loop Examples" from_port="example set" to_op="Store" to_port="input"/>
          <connect from_op="Loop Examples" from_port="output 1" to_op="Flatten Collection" to_port="collection"/>
          <connect from_op="Flatten Collection" from_port="flat" to_op="Store (2)" to_port="input"/>
          <connect from_op="Store (2)" from_port="through" to_port="result 2"/>
          <connect from_op="Store" from_port="through" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
          <portSpacing port="sink_result 3" spacing="0"/>
        </process>
      </operator>
    </process>
Sign In or Register to comment.