Skip to content

Instantly share code, notes, and snippets.

@jerryan999
Created January 3, 2020 09:32
Show Gist options
  • Save jerryan999/cc054450724c3714f11da3b2146d6193 to your computer and use it in GitHub Desktop.
Save jerryan999/cc054450724c3714f11da3b2146d6193 to your computer and use it in GitHub Desktop.
# encoding:utf-8
'''
1) 只看北京地区(包括各行政区)的货运司机的招聘分布情况
python3 to_redis.py --city_name 北京 --position 货运司机 --include_district
2) 只看北京的货运司机的招聘分布情况
python3 to_redis.py --city_name 北京 --position 货运司机
'''
import redis
import json
import pymongo
import pickle
import argparse
redis_db = redis.Redis(host="localhost", port=6379, db=1)
mongo_collection = pymongo.MongoClient()['58']['PositionCount-new']
class RedisLoader(object):
def __init__(self,args):
self.args = args
# 职位信息的文件
with open("position.json") as f:
self.positions = json.load(f)
# 区域信息的文件
with open("districts_city.json") as f:
self.districts_city = json.load(f)
self.preprocess()
def preprocess(self):
if self.args.city_name is not None:
self._filter_city(self.args.city_name)
if self.args.position_name is not None:
self._filter_position(self.args.position_name)
if self.args.include_district == True:
self._include_chengqu()
else:
self._exclude_chengqu()
def _filter_city(self,cityname):
# cityname:中文名比如:北京
# 过滤只剩下一个城市
self.districts_city = [ city for city in self.districts_city if city['city_name']==cityname]
def _exclude_chengqu(self):
# 不包括包括城市内的城区
self.districts_city = [ city for city in self.districts_city if city['district_id']==0]
def _include_chengqu(self):
# 包括城市内的城区
self.districts_city = [ city for city in self.districts_city if city['district_id']!=0]
def _filter_position(self, position_name):
# positon name like 客运司机
self.positions = [ position for position in self.positions if position['position']==position_name]
def to_redis(self):
# include_district: 是否包括城市内部的地区进行细分,
# include_district=True的话,总共有190万个key, 如果按照最多5页的话,一共有1000万次请求
# include_district=False的话,总共有20万个key, 如果按照最多5页的话,一共有100万次请求
if args.include_district == True:
self._include_chengqu()
else:
self._exclude_chengqu()
for city in self.districts_city:
for position in self.positions:
item = city
item.update(position)
key = "doumi-citycode:{},district_code:{},position_path:{}".format(item['city_id'],item['district_id'],item['p_path'])
redis_db.set(key, pickle.dumps(item))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--city_name", help="城市名,比如北京")
parser.add_argument("--position_name", help="职位名,比如客运司机")
parser.add_argument("--include_district",action='store_true', help="是否细分到城市内部的行政区")
args = parser.parse_args()
print(args)
rl = RedisLoader(args)
rl.to_redis()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment