Created
January 3, 2020 09:32
-
-
Save jerryan999/cc054450724c3714f11da3b2146d6193 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# encoding:utf-8 | |
''' | |
1) 只看北京地区(包括各行政区)的货运司机的招聘分布情况 | |
python3 to_redis.py --city_name 北京 --position 货运司机 --include_district | |
2) 只看北京的货运司机的招聘分布情况 | |
python3 to_redis.py --city_name 北京 --position 货运司机 | |
''' | |
import redis | |
import json | |
import pymongo | |
import pickle | |
import argparse | |
redis_db = redis.Redis(host="localhost", port=6379, db=1) | |
mongo_collection = pymongo.MongoClient()['58']['PositionCount-new'] | |
class RedisLoader(object): | |
def __init__(self,args): | |
self.args = args | |
# 职位信息的文件 | |
with open("position.json") as f: | |
self.positions = json.load(f) | |
# 区域信息的文件 | |
with open("districts_city.json") as f: | |
self.districts_city = json.load(f) | |
self.preprocess() | |
def preprocess(self): | |
if self.args.city_name is not None: | |
self._filter_city(self.args.city_name) | |
if self.args.position_name is not None: | |
self._filter_position(self.args.position_name) | |
if self.args.include_district == True: | |
self._include_chengqu() | |
else: | |
self._exclude_chengqu() | |
def _filter_city(self,cityname): | |
# cityname:中文名比如:北京 | |
# 过滤只剩下一个城市 | |
self.districts_city = [ city for city in self.districts_city if city['city_name']==cityname] | |
def _exclude_chengqu(self): | |
# 不包括包括城市内的城区 | |
self.districts_city = [ city for city in self.districts_city if city['district_id']==0] | |
def _include_chengqu(self): | |
# 包括城市内的城区 | |
self.districts_city = [ city for city in self.districts_city if city['district_id']!=0] | |
def _filter_position(self, position_name): | |
# positon name like 客运司机 | |
self.positions = [ position for position in self.positions if position['position']==position_name] | |
def to_redis(self): | |
# include_district: 是否包括城市内部的地区进行细分, | |
# include_district=True的话,总共有190万个key, 如果按照最多5页的话,一共有1000万次请求 | |
# include_district=False的话,总共有20万个key, 如果按照最多5页的话,一共有100万次请求 | |
if args.include_district == True: | |
self._include_chengqu() | |
else: | |
self._exclude_chengqu() | |
for city in self.districts_city: | |
for position in self.positions: | |
item = city | |
item.update(position) | |
key = "doumi-citycode:{},district_code:{},position_path:{}".format(item['city_id'],item['district_id'],item['p_path']) | |
redis_db.set(key, pickle.dumps(item)) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--city_name", help="城市名,比如北京") | |
parser.add_argument("--position_name", help="职位名,比如客运司机") | |
parser.add_argument("--include_district",action='store_true', help="是否细分到城市内部的行政区") | |
args = parser.parse_args() | |
print(args) | |
rl = RedisLoader(args) | |
rl.to_redis() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment