blob: 73753eb526a1454be3f7dfeb3006ba8b64e80f98 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0.html
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This file is to test pushing LARGE Spark data frame into Elasticsearch.
Each row in dataframe is a document in Elasticsearch.
"""
# Reza
import os
import json
from pyspark import SparkContext, SparkConf
from pyspark.sql import SQLContext
from pyspark.sql import HiveContext
from pyspark.sql.functions import udf, expr, collect_list, struct
from pyspark.sql.types import StringType, ArrayType, MapType, FloatType, StructField, StructType
'''
spark-submit --master yarn --num-executors 10 --executor-cores 5 --executor-memory 32G --driver-memory 32G --jars elasticsearch-hadoop-6.8.0.jar scripts/test_spark_es_big_write.py
'''
MDOC = {
"a": "5",
"g": "g_m",
"m": "native",
"predictions": {
"2020-06-21": {
"hours": [
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.52906338707,
"total": 0.52906338707
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.326346659629,
"total": 0.326346659629
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.2899849149,
"total": 0.2899849149
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.263622649876,
"total": 0.263622649876
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.301802481923,
"total": 0.301802481923
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.494519729477,
"total": 0.494519729477
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.931769710793,
"total": 0.931769710793
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.18902905524,
"total": 1.18902905524
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.19630140433,
"total": 1.19630140433
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.965404324726,
"total": 0.965404324726
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.805412647628,
"total": 0.805412647628
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.807230734765,
"total": 0.807230734765
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.884499442518,
"total": 0.884499442518
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.789958905968,
"total": 0.789958905968
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.768141859058,
"total": 0.768141859058
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.769050902717,
"total": 0.769050902717
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.839956305221,
"total": 0.839956305221
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.985403284318,
"total": 0.985403284318
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.02085598557,
"total": 1.02085598557
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.96994954284,
"total": 0.96994954284
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.940860147021,
"total": 0.940860147021
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.879045180745,
"total": 0.879045180745
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.729962027011,
"total": 0.729962027011
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.528154343411,
"total": 0.528154343411
}
],
"date": "2020-06-21"
},
"2020-06-22": {
"hours": [
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.57477844577,
"total": 0.57477844577
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.354545467306,
"total": 0.354545467306
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.315041794152,
"total": 0.315041794152
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.286401631011,
"total": 0.286401631011
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.327880487912,
"total": 0.327880487912
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.537249956164,
"total": 0.537249956164
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.01228162688,
"total": 1.01228162688
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.29177011497,
"total": 1.29177011497
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.29967084976,
"total": 1.29967084976
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.04882252461,
"total": 1.04882252461
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.875006362415,
"total": 0.875006362415
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.876981545964,
"total": 0.876981545964
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.960926851639,
"total": 0.960926851639
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.858217301161,
"total": 0.858217301161
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.834515097189,
"total": 0.834515097189
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.835502689063,
"total": 0.835502689063
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.91253485202,
"total": 0.91253485202
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.07054954483,
"total": 1.07054954483
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.10906562631,
"total": 1.10906562631
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.05376048378,
"total": 1.05376048378
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.02215754522,
"total": 1.02215754522
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.955001300597,
"total": 0.955001300597
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.793036240288,
"total": 0.793036240288
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.573790853896,
"total": 0.573790853896
}
],
"date": "2020-06-22"
},
"2020-06-23": {
"hours": [
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.559059800944,
"total": 0.559059800944
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.344849602202,
"total": 0.344849602202
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.306426248277,
"total": 0.306426248277
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.278569316581,
"total": 0.278569316581
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.318913838289,
"total": 0.318913838289
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.52255761461,
"total": 0.52255761461
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.984598446562,
"total": 0.984598446562
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.2564436761,
"total": 1.2564436761
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.26412834703,
"total": 1.26412834703
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.02014004901,
"total": 1.02014004901
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.851077291428,
"total": 0.851077291428
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.852998459018,
"total": 0.852998459018
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.934648086325,
"total": 0.934648086325
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.834747365851,
"total": 0.834747365851
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.811693353419,
"total": 0.811693353419
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.812653937311,
"total": 0.812653937311
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.887579477762,
"total": 0.887579477762
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.04127289365,
"total": 1.04127289365
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.07873566388,
"total": 1.07873566388
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.02494296827,
"total": 1.02494296827
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.994204285092,
"total": 0.994204285092
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.928884583169,
"total": 0.928884583169
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.771348831711,
"total": 0.771348831711
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.558099217053,
"total": 0.558099217053
}
],
"date": "2020-06-23"
},
"2020-06-24": {
"hours": [
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.553689263963,
"total": 0.553689263963
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.341536848292,
"total": 0.341536848292
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.303482603437,
"total": 0.303482603437
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.275893275817,
"total": 0.275893275817
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.315850233,
"total": 0.315850233
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.517537731246,
"total": 0.517537731246
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.975140026621,
"total": 0.975140026621
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.24437380948,
"total": 1.24437380948
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.2519846586,
"total": 1.2519846586
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.01034020317,
"total": 1.01034020317
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.842901525507,
"total": 0.842901525507
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.844804237645,
"total": 0.844804237645
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.925669508176,
"total": 0.925669508176
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.826728471287,
"total": 0.826728471287
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.803895924298,
"total": 0.803895924298
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.804847280462,
"total": 0.804847280462
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.879053058224,
"total": 0.879053058224
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.03127003783,
"total": 1.03127003783
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.06837292672,
"total": 1.06837292672
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.0150969838,
"total": 1.0150969838
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.984653587882,
"total": 0.984653587882
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.919961371381,
"total": 0.919961371381
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.763938967114,
"total": 0.763938967114
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.552737907798,
"total": 0.552737907798
}
],
"date": "2020-06-24"
},
"2020-06-25": {
"hours": [
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.546877851205,
"total": 0.546877851205
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.337335306747,
"total": 0.337335306747
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.299749200224,
"total": 0.299749200224
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.272499272897,
"total": 0.272499272897
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.31196468483,
"total": 0.31196468483
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.511171049906,
"total": 0.511171049906
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.963143981818,
"total": 0.963143981818
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.22906568597,
"total": 1.22906568597
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.23658290742,
"total": 1.23658290742
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.997911130412,
"total": 0.997911130412
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.832532261413,
"total": 0.832532261413
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.834411566636,
"total": 0.834411566636
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.914282043207,
"total": 0.914282043207
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.816558165986,
"total": 0.816558165986
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.794006501997,
"total": 0.794006501997
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.794946154703,
"total": 0.794946154703
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.868239062713,
"total": 0.868239062713
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.01858348899,
"total": 1.01858348899
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.05522994299,
"total": 1.05522994299
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.00260939375,
"total": 1.00260939375
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.972540508495,
"total": 0.972540508495
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.908644127163,
"total": 0.908644127163
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.754541090064,
"total": 0.754541090064
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.5459381985,
"total": 0.5459381985
}
],
"date": "2020-06-25"
},
"2020-06-26": {
"hours": [
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.52068010983,
"total": 0.52068010983
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.321175531573,
"total": 0.321175531573
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.2853899571,
"total": 0.2853899571
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.259445415513,
"total": 0.259445415513
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.29702026879,
"total": 0.29702026879
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.486683813982,
"total": 0.486683813982
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.917005347958,
"total": 0.917005347958
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.17018828784,
"total": 1.17018828784
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.17734540287,
"total": 1.17734540287
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.950107004404,
"total": 0.950107004404
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.792650476435,
"total": 0.792650476435
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.79443975506,
"total": 0.79443975506
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.870484101017,
"total": 0.870484101017
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.777441607136,
"total": 0.777441607136
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.755970262381,
"total": 0.755970262381
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.756864901783,
"total": 0.756864901783
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.826646772283,
"total": 0.826646772283
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.969789070355,
"total": 0.969789070355
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.00468000561,
"total": 1.00468000561
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.954580201235,
"total": 0.954580201235
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.925951741621,
"total": 0.925951741621
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.865116264784,
"total": 0.865116264784
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.718395409103,
"total": 0.718395409103
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.519785470428,
"total": 0.519785470428
}
],
"date": "2020-06-26"
},
"2020-06-27": {
"hours": [
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.48334832837,
"total": 0.48334832837
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.298147851951,
"total": 0.298147851951
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.264928035648,
"total": 0.264928035648
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.24084366874,
"total": 0.24084366874
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.275724475934,
"total": 0.275724475934
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.451789502791,
"total": 0.451789502791
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.851257794708,
"total": 0.851257794708
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.0862879955,
"total": 1.0862879955
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.09293195889,
"total": 1.09293195889
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.881986124843,
"total": 0.881986124843
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.735818932842,
"total": 0.735818932842
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.737479923565,
"total": 0.737479923565
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.808072033397,
"total": 0.808072033397
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.721700510776,
"total": 0.721700510776
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.701768620927,
"total": 0.701768620927
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.702599116372,
"total": 0.702599116372
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.767377758422,
"total": 0.767377758422
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.900257023801,
"total": 0.900257023801
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.932646344826,
"total": 0.932646344826
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.886138601902,
"total": 0.886138601902
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.859562748826,
"total": 0.859562748826
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.803089060894,
"total": 0.803089060894
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.666887813734,
"total": 0.666887813734
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.482517832925,
"total": 0.482517832925
}
],
"date": "2020-06-27"
},
"2020-06-28": {
"hours": [
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.516619459917,
"total": 0.516619459917
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.318670766421,
"total": 0.318670766421
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.283164274416,
"total": 0.283164274416
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.257422067618,
"total": 0.257422067618
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.294703884304,
"total": 0.294703884304
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.482888292414,
"total": 0.482888292414
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.90985385971,
"total": 0.90985385971
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.16106229112,
"total": 1.16106229112
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.16816358967,
"total": 1.16816358967
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.942697364873,
"total": 0.942697364873
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.786468799764,
"total": 0.786468799764
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.788244124266,
"total": 0.788244124266
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.863695419978,
"total": 0.863695419978
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.771378540515,
"total": 0.771378540515
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.75007464524,
"total": 0.75007464524
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.75096230758,
"total": 0.75096230758
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.820199967267,
"total": 0.820199967267
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.962225935467,
"total": 0.962225935467
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.99684476531,
"total": 0.99684476531
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.947135676396,
"total": 0.947135676396
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.918730482756,
"total": 0.918730482756
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.858369446115,
"total": 0.858369446115
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.712792828554,
"total": 0.712792828554
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.515731797577,
"total": 0.515731797577
}
],
"date": "2020-06-28"
},
"2020-06-29": {
"hours": [
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.484789204146,
"total": 0.484789204146
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.299036639585,
"total": 0.299036639585
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.265717794019,
"total": 0.265717794019
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.241561630896,
"total": 0.241561630896
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.276546418816,
"total": 0.276546418816
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.453136300767,
"total": 0.453136300767
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.85379541957,
"total": 0.85379541957
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.0895262524,
"total": 1.0895262524
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.09619002164,
"total": 1.09619002164
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.884615351773,
"total": 0.884615351773
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.738012431016,
"total": 0.738012431016
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.739678373202,
"total": 0.739678373202
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.810480920218,
"total": 0.810480920218
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.723851921512,
"total": 0.723851921512
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.703860614106,
"total": 0.703860614106
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.704693585283,
"total": 0.704693585283
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.769665334395,
"total": 0.769665334395
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.902940716826,
"total": 0.902940716826
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.935426591383,
"total": 0.935426591383
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.88878020749,
"total": 0.88878020749
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.862125131004,
"total": 0.862125131004
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.805483093324,
"total": 0.805483093324
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.668875826187,
"total": 0.668875826187
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.483956232969,
"total": 0.483956232969
}
],
"date": "2020-06-29"
},
"2020-06-30": {
"hours": [
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.494744345869,
"total": 0.494744345869
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.305177354151,
"total": 0.305177354151
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.271174306407,
"total": 0.271174306407
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.246522096702,
"total": 0.246522096702
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.282225296911,
"total": 0.282225296911
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.462441450418,
"total": 0.462441450418
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.871328100437,
"total": 0.871328100437
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.11189966369,
"total": 1.11189966369
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 1.11870027337,
"total": 1.11870027337
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.902780919656,
"total": 0.902780919656
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.753167509307,
"total": 0.753167509307
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.754867661601,
"total": 0.754867661601
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.82712413825,
"total": 0.82712413825
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.738716213875,
"total": 0.738716213875
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.71831438516,
"total": 0.71831438516
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.719164461392,
"total": 0.719164461392
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.785470404758,
"total": 0.785470404758
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.921482595907,
"total": 0.921482595907
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.95463556759,
"total": 0.95463556759
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.907031300646,
"total": 0.907031300646
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.879828862416,
"total": 0.879828862416
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.822023681029,
"total": 0.822023681029
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.682611184952,
"total": 0.682611184952
},
{
"h2": 0.0,
"h3": 0.0,
"h0": 0.0,
"h1": 0.493894269637,
"total": 0.493894269637
}
],
"date": "2020-06-30"
}
},
"ipl": "40",
"si": "66bcd2720e5011e79bc8fa163e05184e",
"r": "7",
"t": "WIFI",
"pm": "CPC",
"uckey": 0
}
TABLE_NAME = 'reza_big_data_test'
def write_data_into_table(sc):
hive_context = HiveContext(sc)
data = []
k = 0
for i in range(10):
data = []
for _ in range(100000):
k += 1
MDOC['uckey'] = str(k)
mdoc = json.dumps(MDOC, default=lambda x: x.__dict__)
data.append((str(k), mdoc))
df = hive_context.createDataFrame(data, ['uckey', 'ucdoc'])
_mode = 'overwrite'
if i > 0:
_mode = 'append'
df.write.option("header", "true").option("encoding", "UTF-8").mode(_mode).format('hive').saveAsTable(TABLE_NAME)
def test_push(sc, es_write_conf):
hive_context = HiveContext(sc)
command = "SELECT uckey,ucdoc FROM {}".format(TABLE_NAME)
df = hive_context.sql(command)
def format_data(x, field_name):
_doc = {'uckey': x[0], field_name: json.loads(x[1])}
return (x[0], json.dumps(_doc))
rdd = df.rdd.map(lambda x: format_data(x, 'ucdoc'))
rdd.saveAsNewAPIHadoopFile(
path='-',
outputFormatClass="org.elasticsearch.hadoop.mr.EsOutputFormat",
keyClass="org.apache.hadoop.io.NullWritable",
valueClass="org.elasticsearch.hadoop.mr.LinkedMapWritable",
conf=es_write_conf)
if __name__ == '__main__':
sc = SparkContext()
es_write_conf = {"es.nodes": '10.213.37.41', "es.port": '9200',
"es.resource": 'reza_spark_es_test/doc',
"es.batch.size.bytes": "1000000",
"es.batch.size.entries": "1000",
"es.input.json": "yes", "es.mapping.id": "uckey",
"es.nodes.wan.only": "true", "es.write.operation": "upsert"}
# write_data_into_table(sc)
test_push(sc=sc, es_write_conf=es_write_conf)