奔跑吧的数据 2022-09-25 20:27 采纳率: 0%
浏览 104
已结题

DATAX 批量生成josn脚本 ,类似于mysql的,sqlserver怎么编写和mysql脚本对比哪些需要修改。需要安装什么软件和python建立起连接?

DATAX 批量生成josn脚本 ,类似于mysql的,sqlserver怎么编写和mysql脚本对比哪些需要修改。需要安装什么软件和python建立起连接?

# ecoding=utf-8
import json
import getopt
import os
import sys
import MySQLdb

#MySQL相关配置,需根据实际情况作出修改
mysql_host = "hadoop102"
mysql_port = "3306"
mysql_user = "root"
mysql_passwd = "000000"

#HDFS NameNode相关配置,需根据实际情况作出修改
hdfs_nn_host = "hadoop102"
hdfs_nn_port = "8020"

#生成配置文件的目标路径,可根据实际情况作出修改
output_path = "/opt/module/datax/job/import"


def get_connection():
    return MySQLdb.connect(host=mysql_host, port=int(mysql_port), user=mysql_user, passwd=mysql_passwd)


def get_mysql_meta(database, table):
    connection = get_connection()
    cursor = connection.cursor()
    sql = "SELECT COLUMN_NAME,DATA_TYPE from information_schema.COLUMNS WHERE TABLE_SCHEMA=%s AND TABLE_NAME=%s ORDER BY ORDINAL_POSITION"
    cursor.execute(sql, [database, table])
    fetchall = cursor.fetchall()
    cursor.close()
    connection.close()
    return fetchall


def get_mysql_columns(database, table):
    return map(lambda x: x[0], get_mysql_meta(database, table))


def get_hive_columns(database, table):
    def type_mapping(mysql_type):
        mappings = {
            "bigint": "bigint",
            "int": "bigint",
            "smallint": "bigint",
            "tinyint": "bigint",
            "decimal": "string",
            "double": "double",
            "float": "float",
            "binary": "string",
            "char": "string",
            "varchar": "string",
            "datetime": "string",
            "time": "string",
            "timestamp": "string",
            "date": "string",
            "text": "string"
        }
        return mappings[mysql_type]

    meta = get_mysql_meta(database, table)
    return map(lambda x: {"name": x[0], "type": type_mapping(x[1].lower())}, meta)


def generate_json(source_database, source_table):
    job = {
        "job": {
            "setting": {
                "speed": {
                    "channel": 3
                },
                "errorLimit": {
                    "record": 0,
                    "percentage": 0.02
                }
            },
            "content": [{
                "reader": {
                    "name": "mysqlreader",
                    "parameter": {
                        "username": mysql_user,
                        "password": mysql_passwd,
                        "column": get_mysql_columns(source_database, source_table),
                        "splitPk": "",
                        "connection": [{
                            "table": [source_table],
                            "jdbcUrl": ["jdbc:mysql://" + mysql_host + ":" + mysql_port + "/" + source_database]
                        }]
                    }
                },
                "writer": {
                    "name": "hdfswriter",
                    "parameter": {
                        "defaultFS": "hdfs://" + hdfs_nn_host + ":" + hdfs_nn_port,
                        "fileType": "text",
                        "path": "${targetdir}",
                        "fileName": source_table,
                        "column": get_hive_columns(source_database, source_table),
                        "writeMode": "append",
                        "fieldDelimiter": "\t",
                        "compress": "gzip"
                    }
                }
            }]
        }
    }
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    with open(os.path.join(output_path, ".".join([source_database, source_table, "json"])), "w") as f:
        json.dump(job, f)


def main(args):
    source_database = ""
    source_table = ""

    options, arguments = getopt.getopt(args, '-d:-t:', ['sourcedb=', 'sourcetbl='])
    for opt_name, opt_value in options:

  • 写回答

1条回答 默认 最新

  • chuifengde 2022-09-26 08:58
    关注
    获得7.50元问题酬金

    装上pymssql

    要改的部分如下:

    import pymssql
    
    server = 'localhost'
    user = 'sa' 
    password = 'XXXX'
    
    
    def get_connection():
        return pymssql.connect(server, user, password, "库名")
    
    def get_mysql_meta(database, table):
        connection = get_connection()
        cursor = connection.cursor()
        sql = "SELECT COLUMN_NAME,DATA_TYPE from information_schema.COLUMNS WHERE TABLE_CATALOG=%s AND TABLE_NAME=%s ORDER BY ORDINAL_POSITION"
        cursor.execute(sql, (database, table))
        fetchall = cursor.fetchall()
        cursor.close()
        connection.close()
        return fetchall
    
    调用:
    res = get_mysql_columns('库名', '表名')
    print(list(res))
    
    评论 编辑记录

    报告相同问题?

    问题事件

    • 系统已结题 10月3日
    • 创建了问题 9月25日

    悬赏问题

    • ¥15 最小生成树问题 Prim算法和Kruskal算法
    • ¥25 医院住院病人呼叫器设计
    • ¥15 不想和现在的团队合作了,怎么避免他们对程序动手脚
    • ¥30 c++类和数组实验代码
    • ¥20 C语言字符串不区分大小写字典排序相关问题
    • ¥15 关于#python#的问题:我希望通过逆向技术爬取1688搜索页下滑加载的数据
    • ¥15 关于Linux的终端里,模拟实现一个带口令保护的屏保程序遇到的输入输出的问题!(语言-c语言)
    • ¥30 请问,这个嵌入式Linux系统怎么分析,crc检验区域在哪
    • ¥15 二分类改为多分类问题
    • ¥15 Unity微信小游戏上调用ReadPixels()方法报错