# -*- coding:utf-8 -*- import redis import json import os import sys import jieba import time # 设定项目的root路径, 方便后续导入相关代码文件 root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(root_path) pool = redis.ConnectionPool(host='47.xxx.xxx.xx', port=xxxx, decode_responses=True, password='password') r = redis.StrictRedis(connection_pool=pool) start_time = time.time() # 医药产业链产业分类编码 medicine = ['0171', '0179', '0251', '0252', '0392', '0399', '0411', '0412', '0421', '0422', '3543', '3544', '2780', '7340', '7512', '7520', '7530', '7540', '7590', '2710', '2720', '2730', '2740', '2761', '2762', '2770', '3581', '3582', '3583', '3584', '3585', '3586', '3587', '3589', '1491', '1492', '2666', '2683', '2665', '2915', '5151', '5152', '5154', '5126', '5134', '5142', '5193', '5251', '5252', '5254', '5255', '5225', '5234', '5236', '5242', '5211', '5212', '5219', '5292', '7115', '7121', '7122', '5960', '5990', '6432', '6434', '8411', '8412', '8413', '8414', '8415', '8425', '8432', '8433', '8416', '8515', '8512', '8513', '8522', '8434', '8492', '8499', '7244', '7451', '8435', '8436', '8491', '8992', '8930', '8053', '8514', '8521', '8010', '8090'] # 医药产业链关键词 med_words = [医药, '药材', 中药材, '制药', '药用', '医学', '医疗', '中药', '药品', '医用', '消毒', '口腔', '康复', '外科', '内科', '西药', '营养', '诊断', '兽医', '护理', 医疗器械, '医药用品', '保健', '健康', '防护', '口腔', 保健品, '生育', '工商', '医院', '临床', 中西医, '疗养', '精神', '急救', '疾病', '制药', '供血', '养生', '妇幼', '残疾', 残疾人, 老年人, '急救', '体检', '防治', '疫苗', '注射', '计生', '药业', 胶囊, '片剂', 特殊输液, 麻醉类, '手术', 抗肿瘤, 医学中间体, 生物医学, 口腔义齿, 内窥镜, 医护人员, 中药饮片, 药制剂, 抗生素, 防护用品, 消毒产品, '健康技术', 医疗卫生, '诊所'] # 现代金融产业链产业分类编码 financial = ['6610', '6650', '6750', '6870', '6940', '6513', '7271', '6550', '7295', '6621', '6622', '6623', '6624', '6631', '6632', '6633', '6634', '6635', '6636', '6637', '6639', '6640', '6711', '6712', '6720', '6731', '6732', '6739', '6741', '6749', '6760', '6790', '6811', '6812', '6813', '6814', '6820', '6830', '6840', '6851', '6852', '6853', '6860', '6890', '6991', '6999', '6911', '6919', '6920', '6930', '6950'] # 现代金融产业链关键词 fin_words = 银行, '证券', '保险', '期货', '金融', '信用', '货币', '资金', '融资', '贷款', '借贷', '理财', '基金', '投资', '资本', '信托', '资产', '财产', '控股', '律师', '支付', 人民币, '存款', 长期贷款, '票据', 金融债券, '外汇', 理财产品, 中国保监会, '款项'] # 科技服务产业链产业分类编码 science = ['4014', '4015', '3581', '6311', '6312', '6319', '6321', '6322', '6331', '6550', '7310', '7320', '7330', '7340', '7350', '6621', '6622', '6631', '6635', '6640', '6760', '6820', '6853', '6860', '6890', '6911', '6930', '6950', '6410', '6440', '6450', '6490', '6513', '6531', '6532', '6540', '6560', '6520', '6599', '7483', '7484', '7491', '7492', '6621', '6622', '6631', '6635', '6640', '6760', '6820', '6853', '6860', '6890', '6911', '6930', '6950', '6410', '6440', '6450', '6490', '6513', '6531', '6532', '6540', '6560', '6520', '6599', '7259', '7251', '7281', '6429', '6431', '6432', '6433', '7520', '7231', '7232', '7239'] # 科技服务产业链关键词 sci_words = 科技, '电信', 试验机, '传输', '卫星', '研发', '科学', 新能源, '遥感', '测绘', '集成', '电路', 互联网, 分析仪器, '恒温', 干燥箱, 离心机, 培养箱, 科学实验, 科学研究, 新材料 # 高端装备制造产业链分类编码 equipment = ['7320', '6531', '6550', '7491', '3444', '3445', '3446', '3451', '3453', '3484', '3489', '3499', '3813', '3821', '3421', '3422', '3423', '3424', '3425', '3429', '4011', '4014', '4015', '4016', '4019', '4029', '3744', '3812', '3921', '3922', '3412', '3453', '3716', '3811', '3821', '3891', '3899', '2641', '3532', '3575', '3592', '3791', '3792', '3811', '3824', '3921', '3491', '3492', '3493', '3964', '3511', '3512', '3513', '3515', '3516', '3521', '3522', '3523', '3524', '3531', '3542', '3551', '3569', '3571', '3599', '3360', '3399', '3432', '3433', '3434', '3439', '3499', '3531', '3532', '3572', '3579', '3741', '3749', '3742', '3743', '3921', '3962', '4023', '3711', '3712', '3714', '3716', '3720', '3424', '3737', '3513', '7452', '7520', '7251', '7259', '7281', '7289', '5179', '5164', '7224', '7112', '7119', '7296', '4320', '4330', '4360', '4343', '6331', '6339', '6571', '7441', '4341', '5331', '5333', '4831', '4832', '7231', '7439', '7481', '7484'] # 高端装备制造产业链关键词 equ_words = 高端, '装备', '智能', '航空', '通信', '轨道', '交通', '铁路', '海洋', '潜水', '水下', '救捞', 机器人, '石油', '钻探', '冶金', 起重机, '航空器',
'飞机', '航天', '高铁', '交通', '深海', '遥感', '直升机', '航空航天', '民航飞行器', '航空装备', '机场', '航站楼', '消防车', '船用', '航空运输',
'航空器材',
'高温合金', '液压', '精密', '精密机械', '有轨电车', '涂料', '航标', '船坞', '海上']
# 新一代信息技术产业链分类编码
information = ['3562', '3563', '3569', '3831', '3832', '3971', '3972', '3974', '3975', '3976', '3979', '3981', '3982',
'3983',
'3984', '3989', '3912', '3913', '3919', '3921', '3932', '3931', '3911', '3914', '3919', '3922', '4023',
'3940',
'3990', '3915', '4028', '2651', '2669', '2921', '3042', '3051', '3073', '3091', '3841', '3842', '3849',
'3985',
'3973', '6311', '6312', '6319', '6410', '6440', '6513', '6531', '6532', '6560', '6431', '6450', '6433',
'6434',
'6490', '6450', '6421', '6429', '6550', '5176', '5211', '5273', '5274', '5292', '7281', '7289', '6540',
'8121',
'4390', '4910', '8122']
inf_words = ['信息', '元器件', '半导体', '电子', '电缆', '光纤', '元件', '电路', '电阻', '电容', '雷达', '锂电子', '锂电池', '锂电', '镍氢',
'电池', '通讯', '二极管', '电位器', '电感器', '光电', '晶体管', '液晶显示', 'IC', '硬盘', 'CPU', '导航', '制导', '雷达', '工控机', '石墨']
# 人工智能产业链分类编码
artificial = ['7320', '6519', '7491', '6450', '6550', '6520', '3562', '3973', '3824', '3979', '4023', '3581', '3563',
'3922',
'3976', '3983', '4013', '4021', '4022', '4023', '4024', '4025', '4026', '4027', '4028', '4029', '3484',
'3489', '6511', '6512', '6513', '3964', '3492', '3963', '3969', '3491',
'3961', '3969', '3990', '3483', '3499', '3511', '3531', '3551', '3561', '3423', '3434', '3439', '3446',
'3464',
'3572', '3823', '3874', '3913', '3919', '3921', '3922', '3962', '4014', '4029', '3914', '3915', '6531',
'6431',
'6433', '6434', '6490', '6532', '0519', '6319', '6540', '4320', '4330', '4415', '4416', '4420', '4910',
'8132',
'6631', '5179', '5136', '5137', '5154', '5171', '5172', '5175', '5177', '5193', '5282', '5283', '5292',
'5211',
'5273', '5279', '5254', '7281']
# 人工智能产业链关键词
art_words = ['人工智能', '算法', '智能化', '云平台', '芯片', 'AI', '可编程', '单晶硅', '机器人', '自动控制系统', '智能', '大数据', '无人机', '计算机',
'物联网', '互联网']
# 文化创意产业链行业分类编码
culture = ['7212', '7211', '7221', '9051', '9053', '9059', '8070', '5181', '7298', '3542', '3474', '3931', '3932',
'3933',
'3934', '3939', '3471', '3953', '3963', '3472', '3473', '3873', '2461', '2462', '2469', '2221', '2222',
'2642',
'2644', '2664', '8730', '8770', '8710', '8720', '8740', '8810', '8870', '8890', '2431', '2432', '2433',
'2434',
'2435', '2436', '2437', '2438', '2439', '3075', '3076', '2411', '2412', '2414', '2459', '2672', '3951',
'3952',
'3961', '3969', '2421', '2422', '2423', '2429', '8750', '8760', '8820', '8393', '8399', '8831', '8832',
'8840',
'8850', '9011', '9012', '9013', '9019', '9090', '9030', '5622', '8621', '8623', '8624', '8625', '8626',
'8629',
'7520', '7284', '6572', '6422', '6319', '6579', '6432', '2311', '2312', '2319', '2320', '2330', '8060',
'7251',
'7259', '7484', '7491', '7492', '7485', '8610', '8622', '6421', '6429', '5143', '5144', '5145', '5243',
'5244',
'7124', '7125', '5164', '5245', '5246', '5183', '5184', '5175', '5178', '5248', '5147', '5247', '5141',
'5241',
'5137', '5271', '5149', '5249', '7121', '7123']
# 文化创意产业链关键词
cul_words = ['文化经纪', '体育赛事', '旅游', '文化', '艺术交流', '体育表演', '影视文化', '雕塑', '字画', '工艺礼品', '广播', '电影院线', '影城', '收藏品',
'舞台艺术', '珠宝', '玉器', '陶瓷', '刺绣', '电子乐器', '乐器', '影片', '音像制品', '艺术', '印刷', '喷绘', '广告设计', '新闻']
# 国际商贸产业链行业分类编码
commerce = ['7010', '7020', '6513', '6550', '7211', '7212', '7213', '7221', '7222', '7223', '7224', '7231', '7232',
'7241',
'7242', '7243', '7244', '7245', '7246', '7251', '7259', '7262', '7264', '7271', '7272', '7281', '7282',
'7283',
'7284', '7289', '7291', '7293', '7294', '7296', '7297', '7298', '5181', '5182', '5183', '5184', '5189',
'5171',
'5172', '5173', '5174', '5175', '5176', '5177', '5178', '5179', '5161', '5162', '5163', '5164', '5165',
'5151',
'5152', '5153', '5154', '5141', '5142', '5143', '5144', '5145', '5146', '5147', '5131', '5132', '5133',
'5134',
'5135', '5136', '5137', '5138', '5121', '5122', '5123', '5124', '5125', '5126', '5127', '5211', '5212',
'5213',
'5221', '5222', '5223', '5224', '5225', '5226', '5231', '5232', '5233', '5234', '5235', '5236', '5237',
'5238',
'5241', '5242', '5243', '5244', '5245', '5246', '5247', '5248', '5251', '5252', '5253', '5254', '5255',
'5261',
'5262', '5263', '5264', '5265', '5271', '5272', '5273', '5274', '5281', '5282', '5283', '5284', '5285',
'5286',
'5287', '5292', '5293', '5294', '5521', '5522', '5523', '5532', '5320', '5432', '5521', '5522', '5523',
'5612', '5930','5810','5821', '7451', '7452', '7454', '7455', '7459']
comm_words = ['房地产', '物业', '法律', '供应链', '房屋', '物业', '住房', '餐饮服务', '保健服务', '家政服务', '健康咨询服务', '政府部门购物中心', '零售',
'商务', '贸易', '销售', '日用百货', '零配件', '电子产品', '批发', '卫生用品', '灶具', '厨具', '餐具', '批发', '灯具', '食用油', '肉', '禽',
'蛋', '水产品', '调味品', '酒', '汽车', '零配件', '建筑装潢材料', '金属材料', '化工原料', '日用品', '五金', '化妆品', '销售', '电线电缆', '茶叶',
'纺织品', '旧车零售', '音响设备', '仓储', '配送', '货物运输', '冷藏车', '运输']
# 直接匹配的行业分类编码
industry = ['8366', '8341', '8342', '8391', '8392', '8336']
def demo():
g = 0
res = "w"
null = " "
g += 1
# print(g)
res = r.lrange("industry_code:mhCompany2:", 0, -1)
for new_res in res:
new_res = eval(new_res)
companyDict = new_res[1]
coding1 = new_res[1]['industryCode']
coding1 = filter(str.isdigit, coding1)
coding = ''.join(list(coding1))
print(coding)
business = new_res[1]["scope"]
companyDict["result"] = "1"
print(companyDict)
if coding in medicine:
a = 0
for i in med_words:
if i in business:
a += 1
if a >= 0:
r.lpushx("industry_code:zhang:", companyDict)
elif coding in financial:
a = 0
for i in fin_words:
if i in business:
a += 1
if a >= 0:
r.lpushx("industry_code:zhang:", companyDict)
elif coding in science:
a = 0
for i in sci_words:
if i in business:
a += 1
if a >= 0:
r.lpushx("industry_code:zhang:", companyDict)
elif coding in equipment:
a = 0
for i in equ_words:
if i in business:
a += 1
if a >= 0:
r.lpushx("industry_code:zhang:", companyDict)
elif coding in information:
a = 0
for i in inf_words:
if i in business:
a += 1
if a >= 0:
r.lpushx("industry_code:zhang:", companyDict)
elif coding in artificial:
a = 0
for i in art_words:
if i in business:
a += 1
if a >= 0:
r.lpushx("industry_code:zhang:", companyDict)
elif coding in culture:
a = 0
for i in cul_words:
if i in business:
a += 1
if a >= 0:
r.lpushx("industry_code:zhang:", companyDict)
elif coding in commerce:
a = 0
for i in comm_words:
if i in business:
a += 1
if a >= 0:
r.lpushx("industry_code:zhang:", companyDict)
elif coding in industry:
r.lpushx("industry_code:zhang:", companyDict)
else:
print("不属于任何一个行业")
if __name__ == '__main__':
# 公司的行业分类编码
# coding = '0171'
# # 公司的经营范围
# business = '恒瑞医药'
demo()