🎉 🎉 RAPIDMINER 9.10 IS OUT!!! 🎉🎉

Download the latest version helping analytics teams accelerate time-to-value for streaming and IIOT use cases.

CLICK HERE TO DOWNLOAD

Help using "Search Twitter" operator with "since id" parameter

gmpgmp Member Posts: 3 Contributor I
Hey everyone, 

I am having some trouble with the "Search Twitter" operator and the "since id" parameter. I was hoping to use the same query multiple times while only requesting newer tweets, that had not been downloaded yet. 

What I am trying to do is
 - get tweets to a certain query
 - then store those in my repository 
 - search (later) again with the same query, only this time getting newer tweets (greater the last ID of first search), to avoid hitting the rate limit   
 - finally combine those example sets

Not sure if I am missing something here...

Thanks for your help!


Here a sample process:
<?xml version="1.0" encoding="UTF-8"?><process version="9.6.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="9.6.000" expanded="true" name="Process">
    <parameter key="logverbosity" value="init"/>
    <parameter key="random_seed" value="2001"/>
    <parameter key="send_mail" value="never"/>
    <parameter key="notification_email" value=""/>
    <parameter key="process_duration_for_mail" value="30"/>
    <parameter key="encoding" value="SYSTEM"/>
    <process expanded="true">
      <operator activated="true" class="social_media:search_twitter" compatibility="9.6.000" expanded="true" height="82" name="First Search" width="90" x="45" y="136">
        <parameter key="connection_source" value="repository"/>
        <parameter key="connection_entry" value="//Local Repository/Connections/Twitter"/>
        <parameter key="query" value="apples"/>
        <parameter key="result_type" value="recent"/>
        <parameter key="limit" value="500"/>
        <parameter key="locale" value="en"/>
        <parameter key="filter_by_geo_location" value="false"/>
        <parameter key="radius_unit" value="miles"/>
      </operator>
      <operator activated="true" class="extract_macro" compatibility="9.6.000" expanded="true" height="68" name="Get last ID" width="90" x="179" y="136">
        <parameter key="macro" value="max_id"/>
        <parameter key="macro_type" value="statistics"/>
        <parameter key="statistics" value="max"/>
        <parameter key="attribute_name" value="Id"/>
        <list key="additional_macros"/>
      </operator>
      <operator activated="true" class="store" compatibility="9.6.000" expanded="true" height="68" name="First example set" width="90" x="313" y="136">
        <parameter key="repository_entry" value="../data/apples"/>
      </operator>
      <operator activated="true" class="retrieve" compatibility="9.6.000" expanded="true" height="68" name="Retrieve" width="90" x="45" y="493">
        <parameter key="repository_entry" value="../data/apples"/>
      </operator>
      <operator activated="true" class="social_media:search_twitter" compatibility="9.6.000" expanded="true" height="82" name="Second Search" width="90" x="45" y="340">
        <parameter key="connection_source" value="repository"/>
        <parameter key="connection_entry" value="//Local Repository/Connections/Twitter"/>
        <parameter key="query" value="apples"/>
        <parameter key="result_type" value="recent"/>
        <parameter key="limit" value="1000"/>
        <parameter key="since_id" value="%{max_id}"/>
        <parameter key="locale" value="en"/>
        <parameter key="filter_by_geo_location" value="false"/>
        <parameter key="radius_unit" value="miles"/>
      </operator>
      <operator activated="true" class="multiply" compatibility="9.6.000" expanded="true" height="103" name="Second example set" width="90" x="179" y="340"/>
      <operator activated="true" class="append" compatibility="9.6.000" expanded="true" height="103" name="Combined example set" width="90" x="313" y="493">
        <parameter key="datamanagement" value="double_array"/>
        <parameter key="data_management" value="auto"/>
        <parameter key="merge_type" value="all"/>
      </operator>
      <connect from_op="First Search" from_port="output" to_op="Get last ID" to_port="example set"/>
      <connect from_op="Get last ID" from_port="example set" to_op="First example set" to_port="input"/>
      <connect from_op="First example set" from_port="through" to_port="result 1"/>
      <connect from_op="Retrieve" from_port="output" to_op="Combined example set" to_port="example set 2"/>
      <connect from_op="Second Search" from_port="output" to_op="Second example set" to_port="input"/>
      <connect from_op="Second example set" from_port="output 1" to_port="result 2"/>
      <connect from_op="Second example set" from_port="output 2" to_op="Combined example set" to_port="example set 1"/>
      <connect from_op="Combined example set" from_port="merged set" to_port="result 3"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
      <portSpacing port="sink_result 4" spacing="0"/>
      <description align="left" color="green" colored="true" height="196" resized="true" width="398" x="32" y="44">- Search for a string like &amp;quot;apples&amp;quot;&lt;br&gt;- Limit results to 500&lt;br&gt;- Store the example set to the local repository&lt;br&gt;</description>
      <description align="left" color="red" colored="true" height="174" resized="true" width="400" x="33" y="267">- Search again with same query string, e.g. &amp;quot;apples&amp;quot;&lt;br&gt;- Use the last ID from previous search&lt;br/&gt; - Limit results to 1000</description>
      <description align="left" color="red" colored="true" height="167" resized="true" width="402" x="30" y="450">- Load (old) tweets from the repository&lt;br/&gt; - Combine with new search results</description>
      <description align="left" color="yellow" colored="false" height="126" resized="false" width="180" x="904" y="138">- The first search returns as expected: 500 tweets&lt;br/&gt; - The second search has more results than expected, incl. results BEFORE the last ID</description>
    </process>
  </operator>
</process>


Answers

  • gmpgmp Member Posts: 3 Contributor I
    Hey guys, 
    not sure if I tagged this question correctly... so far I haven't receive any feedback. 
    Any idea what I am missing?

    Thanks!
Sign In or Register to comment.