| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| |
| # http://www.apache.org/licenses/LICENSE-2.0.html |
| |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| """ |
| This file is to test pushing LARGE Spark data frame into Elasticsearch. |
| Each row in dataframe is a document in Elasticsearch. |
| """ |
| # Reza |
| |
| import os |
| import json |
| |
| from pyspark import SparkContext, SparkConf |
| from pyspark.sql import SQLContext |
| from pyspark.sql import HiveContext |
| |
| from pyspark.sql.functions import udf, expr, collect_list, struct |
| from pyspark.sql.types import StringType, ArrayType, MapType, FloatType, StructField, StructType |
| |
| ''' |
| spark-submit --master yarn --num-executors 10 --executor-cores 5 --executor-memory 32G --driver-memory 32G --jars elasticsearch-hadoop-6.8.0.jar scripts/test_spark_es_big_write.py |
| ''' |
| |
| MDOC = { |
| "a": "5", |
| "g": "g_m", |
| "m": "native", |
| "predictions": { |
| "2020-06-21": { |
| "hours": [ |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.52906338707, |
| "total": 0.52906338707 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.326346659629, |
| "total": 0.326346659629 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.2899849149, |
| "total": 0.2899849149 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.263622649876, |
| "total": 0.263622649876 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.301802481923, |
| "total": 0.301802481923 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.494519729477, |
| "total": 0.494519729477 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.931769710793, |
| "total": 0.931769710793 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.18902905524, |
| "total": 1.18902905524 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.19630140433, |
| "total": 1.19630140433 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.965404324726, |
| "total": 0.965404324726 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.805412647628, |
| "total": 0.805412647628 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.807230734765, |
| "total": 0.807230734765 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.884499442518, |
| "total": 0.884499442518 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.789958905968, |
| "total": 0.789958905968 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.768141859058, |
| "total": 0.768141859058 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.769050902717, |
| "total": 0.769050902717 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.839956305221, |
| "total": 0.839956305221 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.985403284318, |
| "total": 0.985403284318 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.02085598557, |
| "total": 1.02085598557 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.96994954284, |
| "total": 0.96994954284 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.940860147021, |
| "total": 0.940860147021 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.879045180745, |
| "total": 0.879045180745 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.729962027011, |
| "total": 0.729962027011 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.528154343411, |
| "total": 0.528154343411 |
| } |
| ], |
| "date": "2020-06-21" |
| }, |
| "2020-06-22": { |
| "hours": [ |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.57477844577, |
| "total": 0.57477844577 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.354545467306, |
| "total": 0.354545467306 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.315041794152, |
| "total": 0.315041794152 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.286401631011, |
| "total": 0.286401631011 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.327880487912, |
| "total": 0.327880487912 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.537249956164, |
| "total": 0.537249956164 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.01228162688, |
| "total": 1.01228162688 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.29177011497, |
| "total": 1.29177011497 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.29967084976, |
| "total": 1.29967084976 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.04882252461, |
| "total": 1.04882252461 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.875006362415, |
| "total": 0.875006362415 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.876981545964, |
| "total": 0.876981545964 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.960926851639, |
| "total": 0.960926851639 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.858217301161, |
| "total": 0.858217301161 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.834515097189, |
| "total": 0.834515097189 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.835502689063, |
| "total": 0.835502689063 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.91253485202, |
| "total": 0.91253485202 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.07054954483, |
| "total": 1.07054954483 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.10906562631, |
| "total": 1.10906562631 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.05376048378, |
| "total": 1.05376048378 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.02215754522, |
| "total": 1.02215754522 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.955001300597, |
| "total": 0.955001300597 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.793036240288, |
| "total": 0.793036240288 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.573790853896, |
| "total": 0.573790853896 |
| } |
| ], |
| "date": "2020-06-22" |
| }, |
| "2020-06-23": { |
| "hours": [ |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.559059800944, |
| "total": 0.559059800944 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.344849602202, |
| "total": 0.344849602202 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.306426248277, |
| "total": 0.306426248277 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.278569316581, |
| "total": 0.278569316581 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.318913838289, |
| "total": 0.318913838289 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.52255761461, |
| "total": 0.52255761461 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.984598446562, |
| "total": 0.984598446562 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.2564436761, |
| "total": 1.2564436761 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.26412834703, |
| "total": 1.26412834703 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.02014004901, |
| "total": 1.02014004901 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.851077291428, |
| "total": 0.851077291428 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.852998459018, |
| "total": 0.852998459018 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.934648086325, |
| "total": 0.934648086325 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.834747365851, |
| "total": 0.834747365851 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.811693353419, |
| "total": 0.811693353419 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.812653937311, |
| "total": 0.812653937311 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.887579477762, |
| "total": 0.887579477762 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.04127289365, |
| "total": 1.04127289365 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.07873566388, |
| "total": 1.07873566388 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.02494296827, |
| "total": 1.02494296827 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.994204285092, |
| "total": 0.994204285092 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.928884583169, |
| "total": 0.928884583169 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.771348831711, |
| "total": 0.771348831711 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.558099217053, |
| "total": 0.558099217053 |
| } |
| ], |
| "date": "2020-06-23" |
| }, |
| "2020-06-24": { |
| "hours": [ |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.553689263963, |
| "total": 0.553689263963 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.341536848292, |
| "total": 0.341536848292 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.303482603437, |
| "total": 0.303482603437 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.275893275817, |
| "total": 0.275893275817 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.315850233, |
| "total": 0.315850233 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.517537731246, |
| "total": 0.517537731246 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.975140026621, |
| "total": 0.975140026621 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.24437380948, |
| "total": 1.24437380948 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.2519846586, |
| "total": 1.2519846586 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.01034020317, |
| "total": 1.01034020317 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.842901525507, |
| "total": 0.842901525507 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.844804237645, |
| "total": 0.844804237645 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.925669508176, |
| "total": 0.925669508176 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.826728471287, |
| "total": 0.826728471287 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.803895924298, |
| "total": 0.803895924298 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.804847280462, |
| "total": 0.804847280462 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.879053058224, |
| "total": 0.879053058224 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.03127003783, |
| "total": 1.03127003783 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.06837292672, |
| "total": 1.06837292672 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.0150969838, |
| "total": 1.0150969838 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.984653587882, |
| "total": 0.984653587882 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.919961371381, |
| "total": 0.919961371381 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.763938967114, |
| "total": 0.763938967114 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.552737907798, |
| "total": 0.552737907798 |
| } |
| ], |
| "date": "2020-06-24" |
| }, |
| "2020-06-25": { |
| "hours": [ |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.546877851205, |
| "total": 0.546877851205 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.337335306747, |
| "total": 0.337335306747 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.299749200224, |
| "total": 0.299749200224 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.272499272897, |
| "total": 0.272499272897 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.31196468483, |
| "total": 0.31196468483 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.511171049906, |
| "total": 0.511171049906 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.963143981818, |
| "total": 0.963143981818 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.22906568597, |
| "total": 1.22906568597 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.23658290742, |
| "total": 1.23658290742 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.997911130412, |
| "total": 0.997911130412 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.832532261413, |
| "total": 0.832532261413 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.834411566636, |
| "total": 0.834411566636 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.914282043207, |
| "total": 0.914282043207 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.816558165986, |
| "total": 0.816558165986 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.794006501997, |
| "total": 0.794006501997 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.794946154703, |
| "total": 0.794946154703 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.868239062713, |
| "total": 0.868239062713 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.01858348899, |
| "total": 1.01858348899 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.05522994299, |
| "total": 1.05522994299 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.00260939375, |
| "total": 1.00260939375 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.972540508495, |
| "total": 0.972540508495 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.908644127163, |
| "total": 0.908644127163 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.754541090064, |
| "total": 0.754541090064 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.5459381985, |
| "total": 0.5459381985 |
| } |
| ], |
| "date": "2020-06-25" |
| }, |
| "2020-06-26": { |
| "hours": [ |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.52068010983, |
| "total": 0.52068010983 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.321175531573, |
| "total": 0.321175531573 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.2853899571, |
| "total": 0.2853899571 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.259445415513, |
| "total": 0.259445415513 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.29702026879, |
| "total": 0.29702026879 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.486683813982, |
| "total": 0.486683813982 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.917005347958, |
| "total": 0.917005347958 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.17018828784, |
| "total": 1.17018828784 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.17734540287, |
| "total": 1.17734540287 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.950107004404, |
| "total": 0.950107004404 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.792650476435, |
| "total": 0.792650476435 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.79443975506, |
| "total": 0.79443975506 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.870484101017, |
| "total": 0.870484101017 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.777441607136, |
| "total": 0.777441607136 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.755970262381, |
| "total": 0.755970262381 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.756864901783, |
| "total": 0.756864901783 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.826646772283, |
| "total": 0.826646772283 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.969789070355, |
| "total": 0.969789070355 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.00468000561, |
| "total": 1.00468000561 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.954580201235, |
| "total": 0.954580201235 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.925951741621, |
| "total": 0.925951741621 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.865116264784, |
| "total": 0.865116264784 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.718395409103, |
| "total": 0.718395409103 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.519785470428, |
| "total": 0.519785470428 |
| } |
| ], |
| "date": "2020-06-26" |
| }, |
| "2020-06-27": { |
| "hours": [ |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.48334832837, |
| "total": 0.48334832837 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.298147851951, |
| "total": 0.298147851951 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.264928035648, |
| "total": 0.264928035648 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.24084366874, |
| "total": 0.24084366874 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.275724475934, |
| "total": 0.275724475934 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.451789502791, |
| "total": 0.451789502791 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.851257794708, |
| "total": 0.851257794708 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.0862879955, |
| "total": 1.0862879955 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.09293195889, |
| "total": 1.09293195889 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.881986124843, |
| "total": 0.881986124843 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.735818932842, |
| "total": 0.735818932842 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.737479923565, |
| "total": 0.737479923565 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.808072033397, |
| "total": 0.808072033397 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.721700510776, |
| "total": 0.721700510776 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.701768620927, |
| "total": 0.701768620927 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.702599116372, |
| "total": 0.702599116372 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.767377758422, |
| "total": 0.767377758422 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.900257023801, |
| "total": 0.900257023801 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.932646344826, |
| "total": 0.932646344826 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.886138601902, |
| "total": 0.886138601902 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.859562748826, |
| "total": 0.859562748826 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.803089060894, |
| "total": 0.803089060894 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.666887813734, |
| "total": 0.666887813734 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.482517832925, |
| "total": 0.482517832925 |
| } |
| ], |
| "date": "2020-06-27" |
| }, |
| "2020-06-28": { |
| "hours": [ |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.516619459917, |
| "total": 0.516619459917 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.318670766421, |
| "total": 0.318670766421 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.283164274416, |
| "total": 0.283164274416 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.257422067618, |
| "total": 0.257422067618 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.294703884304, |
| "total": 0.294703884304 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.482888292414, |
| "total": 0.482888292414 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.90985385971, |
| "total": 0.90985385971 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.16106229112, |
| "total": 1.16106229112 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.16816358967, |
| "total": 1.16816358967 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.942697364873, |
| "total": 0.942697364873 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.786468799764, |
| "total": 0.786468799764 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.788244124266, |
| "total": 0.788244124266 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.863695419978, |
| "total": 0.863695419978 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.771378540515, |
| "total": 0.771378540515 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.75007464524, |
| "total": 0.75007464524 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.75096230758, |
| "total": 0.75096230758 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.820199967267, |
| "total": 0.820199967267 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.962225935467, |
| "total": 0.962225935467 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.99684476531, |
| "total": 0.99684476531 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.947135676396, |
| "total": 0.947135676396 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.918730482756, |
| "total": 0.918730482756 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.858369446115, |
| "total": 0.858369446115 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.712792828554, |
| "total": 0.712792828554 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.515731797577, |
| "total": 0.515731797577 |
| } |
| ], |
| "date": "2020-06-28" |
| }, |
| "2020-06-29": { |
| "hours": [ |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.484789204146, |
| "total": 0.484789204146 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.299036639585, |
| "total": 0.299036639585 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.265717794019, |
| "total": 0.265717794019 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.241561630896, |
| "total": 0.241561630896 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.276546418816, |
| "total": 0.276546418816 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.453136300767, |
| "total": 0.453136300767 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.85379541957, |
| "total": 0.85379541957 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.0895262524, |
| "total": 1.0895262524 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.09619002164, |
| "total": 1.09619002164 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.884615351773, |
| "total": 0.884615351773 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.738012431016, |
| "total": 0.738012431016 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.739678373202, |
| "total": 0.739678373202 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.810480920218, |
| "total": 0.810480920218 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.723851921512, |
| "total": 0.723851921512 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.703860614106, |
| "total": 0.703860614106 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.704693585283, |
| "total": 0.704693585283 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.769665334395, |
| "total": 0.769665334395 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.902940716826, |
| "total": 0.902940716826 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.935426591383, |
| "total": 0.935426591383 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.88878020749, |
| "total": 0.88878020749 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.862125131004, |
| "total": 0.862125131004 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.805483093324, |
| "total": 0.805483093324 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.668875826187, |
| "total": 0.668875826187 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.483956232969, |
| "total": 0.483956232969 |
| } |
| ], |
| "date": "2020-06-29" |
| }, |
| "2020-06-30": { |
| "hours": [ |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.494744345869, |
| "total": 0.494744345869 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.305177354151, |
| "total": 0.305177354151 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.271174306407, |
| "total": 0.271174306407 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.246522096702, |
| "total": 0.246522096702 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.282225296911, |
| "total": 0.282225296911 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.462441450418, |
| "total": 0.462441450418 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.871328100437, |
| "total": 0.871328100437 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.11189966369, |
| "total": 1.11189966369 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 1.11870027337, |
| "total": 1.11870027337 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.902780919656, |
| "total": 0.902780919656 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.753167509307, |
| "total": 0.753167509307 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.754867661601, |
| "total": 0.754867661601 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.82712413825, |
| "total": 0.82712413825 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.738716213875, |
| "total": 0.738716213875 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.71831438516, |
| "total": 0.71831438516 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.719164461392, |
| "total": 0.719164461392 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.785470404758, |
| "total": 0.785470404758 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.921482595907, |
| "total": 0.921482595907 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.95463556759, |
| "total": 0.95463556759 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.907031300646, |
| "total": 0.907031300646 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.879828862416, |
| "total": 0.879828862416 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.822023681029, |
| "total": 0.822023681029 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.682611184952, |
| "total": 0.682611184952 |
| }, |
| { |
| "h2": 0.0, |
| "h3": 0.0, |
| "h0": 0.0, |
| "h1": 0.493894269637, |
| "total": 0.493894269637 |
| } |
| ], |
| "date": "2020-06-30" |
| } |
| }, |
| "ipl": "40", |
| "si": "66bcd2720e5011e79bc8fa163e05184e", |
| "r": "7", |
| "t": "WIFI", |
| "pm": "CPC", |
| "uckey": 0 |
| } |
| |
| TABLE_NAME = 'reza_big_data_test' |
| |
| |
| def write_data_into_table(sc): |
| hive_context = HiveContext(sc) |
| data = [] |
| |
| k = 0 |
| for i in range(10): |
| data = [] |
| for _ in range(100000): |
| k += 1 |
| MDOC['uckey'] = str(k) |
| mdoc = json.dumps(MDOC, default=lambda x: x.__dict__) |
| data.append((str(k), mdoc)) |
| |
| df = hive_context.createDataFrame(data, ['uckey', 'ucdoc']) |
| _mode = 'overwrite' |
| if i > 0: |
| _mode = 'append' |
| df.write.option("header", "true").option("encoding", "UTF-8").mode(_mode).format('hive').saveAsTable(TABLE_NAME) |
| |
| |
| def test_push(sc, es_write_conf): |
| hive_context = HiveContext(sc) |
| |
| command = "SELECT uckey,ucdoc FROM {}".format(TABLE_NAME) |
| df = hive_context.sql(command) |
| |
| def format_data(x, field_name): |
| _doc = {'uckey': x[0], field_name: json.loads(x[1])} |
| return (x[0], json.dumps(_doc)) |
| |
| rdd = df.rdd.map(lambda x: format_data(x, 'ucdoc')) |
| |
| rdd.saveAsNewAPIHadoopFile( |
| path='-', |
| outputFormatClass="org.elasticsearch.hadoop.mr.EsOutputFormat", |
| keyClass="org.apache.hadoop.io.NullWritable", |
| valueClass="org.elasticsearch.hadoop.mr.LinkedMapWritable", |
| conf=es_write_conf) |
| |
| |
| if __name__ == '__main__': |
| |
| sc = SparkContext() |
| |
| es_write_conf = {"es.nodes": '10.213.37.41', "es.port": '9200', |
| "es.resource": 'reza_spark_es_test/doc', |
| "es.batch.size.bytes": "1000000", |
| "es.batch.size.entries": "1000", |
| "es.input.json": "yes", "es.mapping.id": "uckey", |
| "es.nodes.wan.only": "true", "es.write.operation": "upsert"} |
| |
| # write_data_into_table(sc) |
| test_push(sc=sc, es_write_conf=es_write_conf) |