JSON to data and de-pivot for exampleset

alex_fawzialex_fawzi Member Posts: 6 Contributor I
edited December 2018 in Help

I'm having a 'mare trying to get some data to de-pivot from json... 

 

The below process gets data from Poloniex in json format. The first row works fine and the exampleset is generated as I'd like, the other 2 (the orderbook and historical trades) I just can't get my head around. The Poloniex public api info is here https://poloniex.com/support/api/ but seems to be outdated. Any help would be appreciated.

 

I came across another post whilst trying to figure out how to use the json file which gave me the de-pivot, I also found one using the json -> xml & xpath way but it seems like json -> data and de-pivot should work.

<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="web:get_webpage" compatibility="7.3.000" expanded="true" height="68" name="Get Page" width="90" x="45" y="34">
<parameter key="url" value="https://poloniex.com/public?command=returnChartData&amp;currencyPair=BTC_XMR&amp;end=9999999999&amp;period=7200&amp;start=1505057287"/>
<parameter key="random_user_agent" value="false"/>
<parameter key="connection_timeout" value="10000"/>
<parameter key="read_timeout" value="10000"/>
<parameter key="follow_redirects" value="true"/>
<parameter key="accept_cookies" value="none"/>
<parameter key="cookie_scope" value="global"/>
<parameter key="request_method" value="GET"/>
<list key="query_parameters"/>
<list key="request_properties"/>
<parameter key="override_encoding" value="false"/>
<parameter key="encoding" value="SYSTEM"/>
<description align="center" color="orange" colored="true" width="126">Get historical price data</description>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="text:json_to_data" compatibility="7.4.001" expanded="true" height="82" name="JSON To Data" width="90" x="179" y="34">
<parameter key="ignore_arrays" value="false"/>
<parameter key="limit_attributes" value="false"/>
<parameter key="skip_invalid_documents" value="false"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="web:get_webpage" compatibility="7.3.000" expanded="true" height="68" name="Get Page (2)" width="90" x="45" y="187">
<parameter key="url" value="https://poloniex.com/public?command=returnTradeHistory&amp;currencyPair=BTC_XMR"/>
<parameter key="random_user_agent" value="false"/>
<parameter key="connection_timeout" value="10000"/>
<parameter key="read_timeout" value="10000"/>
<parameter key="follow_redirects" value="true"/>
<parameter key="accept_cookies" value="none"/>
<parameter key="cookie_scope" value="global"/>
<parameter key="request_method" value="GET"/>
<list key="query_parameters"/>
<list key="request_properties"/>
<parameter key="override_encoding" value="false"/>
<parameter key="encoding" value="SYSTEM"/>
<description align="center" color="orange" colored="true" width="126">Get historical trade data (last month / 50,000 trades (API limit))</description>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="text:json_to_data" compatibility="7.4.001" expanded="true" height="82" name="JSON To Data (2)" width="90" x="179" y="187">
<parameter key="ignore_arrays" value="false"/>
<parameter key="limit_attributes" value="false"/>
<parameter key="skip_invalid_documents" value="false"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="de_pivot" compatibility="7.6.001" expanded="true" height="82" name="De-Pivot (2)" width="90" x="313" y="187">
<list key="attribute_name">
<parameter key="amount" value="amount"/>
<parameter key="date" value="date"/>
<parameter key="globalTradeID" value="globalTradeID"/>
<parameter key="rate" value="rate"/>
<parameter key="total" value="total"/>
<parameter key="tradeID" value="tradeID"/>
</list>
<parameter key="index_attribute" value="id"/>
<parameter key="create_nominal_index" value="false"/>
<parameter key="keep_missings" value="false"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="web:get_webpage" compatibility="7.3.000" expanded="true" height="68" name="Get Page (3)" width="90" x="45" y="340">
<parameter key="url" value="https://poloniex.com/public?command=returnOrderBook&amp;currencyPair=BTC_XMR&amp;depth=10&amp;start=1505057287end=9999999999"/>
<parameter key="random_user_agent" value="false"/>
<parameter key="connection_timeout" value="10000"/>
<parameter key="read_timeout" value="10000"/>
<parameter key="follow_redirects" value="true"/>
<parameter key="accept_cookies" value="none"/>
<parameter key="cookie_scope" value="global"/>
<parameter key="request_method" value="GET"/>
<list key="query_parameters"/>
<list key="request_properties"/>
<parameter key="override_encoding" value="false"/>
<parameter key="encoding" value="SYSTEM"/>
<description align="center" color="orange" colored="true" width="126">Get orderbook data</description>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="text:json_to_data" compatibility="7.4.001" expanded="true" height="82" name="JSON To Data (3)" width="90" x="179" y="340">
<parameter key="ignore_arrays" value="false"/>
<parameter key="limit_attributes" value="false"/>
<parameter key="skip_invalid_documents" value="false"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="de_pivot" compatibility="7.6.001" expanded="true" height="82" name="De-Pivot" width="90" x="313" y="34">
<list key="attribute_name">
<parameter key="close" value="\[\d+\]\.close"/>
<parameter key="high" value="\[\d+\]\.high"/>
<parameter key="low" value="\[\d+\]\.low"/>
<parameter key="open" value="\[\d+\]\.open"/>
<parameter key="date" value="\[\d+\]\.date"/>
<parameter key="volume" value="\[\d+\]\.volume"/>
<parameter key="quoteVolume" value="\[\d+\]\.quoteVolume"/>
<parameter key="weightedAverage" value="\[\d+\]\.weightedAverage"/>
</list>
<parameter key="index_attribute" value="id"/>
<parameter key="create_nominal_index" value="false"/>
<parameter key="keep_missings" value="false"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="de_pivot" compatibility="7.6.001" expanded="true" height="82" name="De-Pivot (3)" width="90" x="313" y="340">
<list key="attribute_name">
<parameter key="asks" value="asks"/>
<parameter key="bids" value="bids"/>
</list>
<parameter key="index_attribute" value="id"/>
<parameter key="create_nominal_index" value="false"/>
<parameter key="keep_missings" value="false"/>
</operator>
</process>

Thanks,

Alex

Best Answer

  • sgenzersgenzer 12Administrator, Moderator, Employee, RapidMiner Certified Analyst, Community Manager, Member, University Professor, PM Moderator Posts: 2,959 Community Manager
    Solution Accepted

    hello @alex_fawzi - ok that process came through fine.  The good news is that you're not really missing anything; the bad news is that second JSON array is just not that easy to work with in RapidMiner.  You can do what was suggested in that thread (JSON to XML, Write Document, Read XML) if you're just looking for one or two things.  If you really want to "unwrap" the array, you need to do some work.  Here is a quick-and-dirty for you that will give you the general idea (I probably don't have all the fields correct...):

     

    <?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="7.6.001" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="web:get_webpage" compatibility="7.3.000" expanded="true" height="68" name="Get Page (3)" width="90" x="45" y="34">
    <parameter key="url" value="https://poloniex.com/public?command=returnOrderBook&amp;currencyPair=BTC_XMR&amp;depth=10&amp;start=1506279150&amp;end=9999999999&amp;period=7200"/>
    <list key="query_parameters"/>
    <list key="request_properties"/>
    </operator>
    <operator activated="true" class="text:json_to_data" compatibility="7.5.000" expanded="true" height="82" name="JSON To Data (3)" width="90" x="179" y="34"/>
    <operator activated="true" class="transpose" compatibility="7.6.001" expanded="true" height="82" name="Transpose" width="90" x="313" y="34"/>
    <operator activated="true" class="filter_examples" compatibility="7.6.001" expanded="true" height="103" name="Filter Examples" width="90" x="447" y="34">
    <list key="filters_list">
    <parameter key="filters_entry_key" value="id.contains.["/>
    </list>
    </operator>
    <operator activated="true" class="replace" compatibility="7.6.001" expanded="true" height="82" name="Replace" width="90" x="581" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="id"/>
    <parameter key="include_special_attributes" value="true"/>
    <parameter key="replace_what" value="\]"/>
    </operator>
    <operator activated="true" class="split" compatibility="7.6.001" expanded="true" height="82" name="Split" width="90" x="715" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="id"/>
    <parameter key="include_special_attributes" value="true"/>
    <parameter key="split_pattern" value="\["/>
    </operator>
    <operator activated="true" class="pivot" compatibility="7.6.001" expanded="true" height="82" name="Pivot" width="90" x="849" y="34">
    <parameter key="group_attribute" value="id_2"/>
    <parameter key="index_attribute" value="id_1"/>
    <parameter key="consider_weights" value="false"/>
    </operator>
    <connect from_op="Get Page (3)" from_port="output" to_op="JSON To Data (3)" to_port="documents 1"/>
    <connect from_op="JSON To Data (3)" from_port="example set" to_op="Transpose" to_port="example set input"/>
    <connect from_op="Transpose" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
    <connect from_op="Filter Examples" from_port="example set output" to_op="Replace" to_port="example set input"/>
    <connect from_op="Replace" from_port="example set output" to_op="Split" to_port="example set input"/>
    <connect from_op="Split" from_port="example set output" to_op="Pivot" to_port="example set input"/>
    <connect from_op="Pivot" from_port="example set output" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    </process>
    </operator>
    </process>

    Funny enough I have been working on a small project that will require a tool that will do exactly this much easier.  Stay tuned.  :)

     

    Scott

     

Answers

  • sgenzersgenzer 12Administrator, Moderator, Employee, RapidMiner Certified Analyst, Community Manager, Member, University Professor, PM Moderator Posts: 2,959 Community Manager

    hello @alex_fawzi - happy to help but can you please paste only one process XML in the thread?  It looks like you have two or three.


    Scott

     

  • alex_fawzialex_fawzi Member Posts: 6 Contributor I

    Hi Scott, sorry about that! Let me try again. 

     

    I'm just copying and pasting directly from studio so I'll try process by process. 

     

    This one works

     

    <?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
    <operator activated="true" class="web:get_webpage" compatibility="7.3.000" expanded="true" height="68" name="Get Page" width="90" x="45" y="34">
    <parameter key="url" value="https://poloniex.com/public?command=returnChartData&amp;currencyPair=BTC_XMR&amp;end=9999999999&amp;period=7200&amp;start=1505057287"/>
    <parameter key="random_user_agent" value="false"/>
    <parameter key="connection_timeout" value="10000"/>
    <parameter key="read_timeout" value="10000"/>
    <parameter key="follow_redirects" value="true"/>
    <parameter key="accept_cookies" value="none"/>
    <parameter key="cookie_scope" value="global"/>
    <parameter key="request_method" value="GET"/>
    <list key="query_parameters"/>
    <list key="request_properties"/>
    <parameter key="override_encoding" value="false"/>
    <parameter key="encoding" value="SYSTEM"/>
    <description align="center" color="orange" colored="true" width="126">Get historical price data</description>
    </operator>
    </process>
    <?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
    <operator activated="true" class="text:json_to_data" compatibility="7.4.001" expanded="true" height="82" name="JSON To Data" width="90" x="179" y="34">
    <parameter key="ignore_arrays" value="false"/>
    <parameter key="limit_attributes" value="false"/>
    <parameter key="skip_invalid_documents" value="false"/>
    </operator>
    </process>
    <?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
    <operator activated="true" class="de_pivot" compatibility="7.6.001" expanded="true" height="82" name="De-Pivot" width="90" x="313" y="34">
    <list key="attribute_name">
    <parameter key="close" value="\[\d+\]\.close"/>
    <parameter key="high" value="\[\d+\]\.high"/>
    <parameter key="low" value="\[\d+\]\.low"/>
    <parameter key="open" value="\[\d+\]\.open"/>
    <parameter key="date" value="\[\d+\]\.date"/>
    <parameter key="volume" value="\[\d+\]\.volume"/>
    <parameter key="quoteVolume" value="\[\d+\]\.quoteVolume"/>
    <parameter key="weightedAverage" value="\[\d+\]\.weightedAverage"/>
    </list>
    <parameter key="index_attribute" value="id"/>
    <parameter key="create_nominal_index" value="false"/>
    <parameter key="keep_missings" value="false"/>
    </operator>
    </process>

    This one doesn't (I'll leave out the third as I think whatever it is that's stopping me from getting it right is the same for both)

     

     

    <?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
    <operator activated="true" class="web:get_webpage" compatibility="7.3.000" expanded="true" height="68" name="Get Page (2)" width="90" x="45" y="187">
    <parameter key="url" value="https://poloniex.com/public?command=returnTradeHistory&amp;currencyPair=BTC_XMR"/>
    <parameter key="random_user_agent" value="false"/>
    <parameter key="connection_timeout" value="10000"/>
    <parameter key="read_timeout" value="10000"/>
    <parameter key="follow_redirects" value="true"/>
    <parameter key="accept_cookies" value="none"/>
    <parameter key="cookie_scope" value="global"/>
    <parameter key="request_method" value="GET"/>
    <list key="query_parameters"/>
    <list key="request_properties"/>
    <parameter key="override_encoding" value="false"/>
    <parameter key="encoding" value="SYSTEM"/>
    <description align="center" color="orange" colored="true" width="126">Get historical trade data (last month / 50,000 trades (API limit))</description>
    </operator>
    </process>
    <?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
    <operator activated="true" class="text:json_to_data" compatibility="7.4.001" expanded="true" height="82" name="JSON To Data (2)" width="90" x="179" y="187">
    <parameter key="ignore_arrays" value="false"/>
    <parameter key="limit_attributes" value="false"/>
    <parameter key="skip_invalid_documents" value="false"/>
    </operator>
    </process>
    <?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
    <operator activated="true" class="de_pivot" compatibility="7.6.001" expanded="true" height="82" name="De-Pivot (2)" width="90" x="313" y="187">
    <list key="attribute_name">
    <parameter key="amount" value=".amount"/>
    <parameter key="date" value=".date"/>
    <parameter key="globalTradeID" value="\[\d+\]\.globalTradeID"/>
    <parameter key="rate" value="\[\d+\]\.rate"/>
    <parameter key="total" value="\[\d+\]\.total"/>
    <parameter key="tradeID" value="\[\d+\]\.tradeID"/>
    </list>
    <parameter key="index_attribute" value="id"/>
    <parameter key="create_nominal_index" value="false"/>
    <parameter key="keep_missings" value="false"/>
    </operator>
    </process>

    Thanks

     

  • sgenzersgenzer 12Administrator, Moderator, Employee, RapidMiner Certified Analyst, Community Manager, Member, University Professor, PM Moderator Posts: 2,959 Community Manager

    hello @alex_fawzi - so I think you're still pasting in more than one process.  Just take everything in the XML pane and paste here.  One process per insert.  Otherwise you get this mess:

    Screen Shot 2017-10-02 at 2.35.06 PM.png

     

    Scott

     

  • alex_fawzialex_fawzi Member Posts: 6 Contributor I

    Third time lucky... sorry about this. Removed one of the lines as it was getting stuck so in the proess the first line seems to work but the second doesn't transform as I would expect and I don't know why.

     

    This is where I found the info that led to me to where I am now - http://community.rapidminer.com/t5/RapidMiner-Studio-Forum/Rapidminer-and-JSON/m-p/33574

     

    ?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="7.6.001" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="web:get_webpage" compatibility="7.3.000" expanded="true" height="68" name="Get Page" width="90" x="112" y="34">
    <parameter key="url" value="https://poloniex.com/public?command=returnChartData&amp;currencyPair=BTC_XMR&amp;start=1506279150&amp;end=9999999999&amp;period=7200"/>
    <list key="query_parameters"/>
    <list key="request_properties"/>
    </operator>
    <operator activated="true" class="text:json_to_data" compatibility="7.4.001" expanded="true" height="82" name="JSON To Data" width="90" x="246" y="34"/>
    <operator activated="true" class="de_pivot" compatibility="7.6.001" expanded="true" height="82" name="De-Pivot" width="90" x="380" y="34">
    <list key="attribute_name">
    <parameter key="close" value="\[\d+\]\.close"/>
    <parameter key="high" value="\[\d+\]\.high"/>
    <parameter key="low" value="\[\d+\]\.low"/>
    <parameter key="open" value="\[\d+\]\.open"/>
    <parameter key="date" value="\[\d+\]\.date"/>
    <parameter key="volume" value="\[\d+\]\.volume"/>
    <parameter key="quoteVolume" value="\[\d+\]\.quoteVolume"/>
    <parameter key="weightedAverage" value="\[\d+\]\.weightedAverage"/>
    </list>
    <parameter key="index_attribute" value="id"/>
    </operator>
    <operator activated="true" class="web:get_webpage" compatibility="7.3.000" expanded="true" height="68" name="Get Page (3)" width="90" x="45" y="340">
    <parameter key="url" value="https://poloniex.com/public?command=returnOrderBook&amp;currencyPair=BTC_XMR&amp;depth=10&amp;start=1506279150&amp;end=9999999999&amp;period=7200"/>
    <list key="query_parameters"/>
    <list key="request_properties"/>
    </operator>
    <operator activated="true" class="text:json_to_data" compatibility="7.4.001" expanded="true" height="82" name="JSON To Data (3)" width="90" x="179" y="340"/>
    <operator activated="true" class="de_pivot" compatibility="7.6.001" expanded="true" height="82" name="De-Pivot (3)" width="90" x="313" y="340">
    <list key="attribute_name">
    <parameter key="asks" value="\[\d+\]\.asks"/>
    <parameter key="bids" value="\[\d+\]\.bids"/>
    <parameter key="isFrozen" value="\[\d+\]\.isFrozen"/>
    <parameter key="seq" value="\[\d+\]\.seq"/>
    </list>
    <parameter key="index_attribute" value="id"/>
    </operator>
    <connect from_op="Get Page" from_port="output" to_op="JSON To Data" to_port="documents 1"/>
    <connect from_op="JSON To Data" from_port="example set" to_op="De-Pivot" to_port="example set input"/>
    <connect from_op="De-Pivot" from_port="example set output" to_port="result 1"/>
    <connect from_op="Get Page (3)" from_port="output" to_op="JSON To Data (3)" to_port="documents 1"/>
    <connect from_op="JSON To Data (3)" from_port="example set" to_op="De-Pivot (3)" to_port="example set input"/>
    <connect from_op="De-Pivot (3)" from_port="example set output" to_port="result 2"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    <portSpacing port="sink_result 3" spacing="0"/>
    </process>
    </operator>
    </process>

     

    Thanks again

  • alex_fawzialex_fawzi Member Posts: 6 Contributor I

    Amazing, thanks for that. I'll have a tinker about but it's looking good! 

    sgenzer
  • websiteguywebsiteguy Member Posts: 24 Contributor II

    Hi @sgenzer

     

    can you help me out with the timestampI cant work out how to convert from string of numbers to date. I just keep ending up with 1970 ?

     

    https://poloniex.com/public?command=returnChartData&currencyPair=USDT_XRP&end=9999999999&period=14400&start=1405699200

    to get it from a string to a date time format, I spent hours trying to suss it out?

     

    thanks in advance, lee

  • sgenzersgenzer 12Administrator, Moderator, Employee, RapidMiner Certified Analyst, Community Manager, Member, University Professor, PM Moderator Posts: 2,959 Community Manager

    hello @websiteguy (great screenname, BTW) - sure happy to help. Can you please post your XML and dataset so I can take a look?

     

    Scott

     

  • Thomas_OttThomas_Ott RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 1,760 Unicorn
Sign In or Register to comment.