8-09 3,375 views
import requests url请求
import threading 多线程
from bs4 import BeautifulSoup 网页抓取元素扩展,类似JQ
import datetime 时间库
import hashlib hash库,用来计算文件hash,防止重复下载文件
import os,sys,os.path 系统IO处理库
import random 随机数库
import re 正则库
import urllib.request URL请求库
"爬虫所需相关重要技术"
@1.RQ: python的简单任务队列
@2.scrapy-redis: python redis 分布式
相关学习链接:
scrapy-redis:
https://www.zhihu.com/question/32302268/answer/55724369
http://www.xuebuyuan.com/2185136.html
https://github.com/rolando/scrapy-redis
http://scrapy-chs.readthedocs.io/zh_CN/latest/intro/overview.html
rq:
http://hao.jobbole.com/python-rq/
https://github.com/nvie/rq
知乎:
如何入门 Python 爬虫? : https://www.zhihu.com/question/20899988
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import pymysql
import time
class MySQL:
error_code = '' #MySQL错误号码
_instance = None #本类的实例
_conn = None # 数据库conn
_cur = None #游标
_TIMEOUT = 30 #默认超时30秒
_timecount = 0
def __init__(self, dbconfig):
self._conn = pymysql.connect(host=dbconfig['host'],
port=dbconfig['port'],
user=dbconfig['user'],
passwd=dbconfig['passwd'],
db=dbconfig['db'],
charset=dbconfig['charset'])
self._cur = self._conn.cursor()
self._instance = pymysql
def query(self,sql,str=''):
self._cur.execute("SET NAMES utf8")
if (str != ''):
return self._cur.execute(sql,str)
else:
return self._cur.execute(sql)
def update(self,sql,str=''):
try:
self._cur.execute("SET NAMES utf8")
if (str != ''):
return self._cur.execute(sql,str)
else:
return self._cur.execute(sql)
self._conn.commit()
except Exception :print("发生异常")
return result
def insert(self,sql,str=''):
self._cur.execute("SET NAMES utf8")
if (str != ''):
return self._cur.execute(sql,str)
else:
return self._cur.execute(sql)
self._conn.commit()
return self._cur.lastrowid
def fetchAllRows(self):
return self._cur.fetchall()
def fetchOneRow(self):
return self._cur.fetchone()
def getRowCount(self):
return self._cur.rowcount
def commit(self):
self._conn.commit()
def rollback(self):
self._conn.rollback()
def __del__(self):
try:
self._cur.close()
self._conn.close()
except:
pass
def close(self):
self.__del__()
# if __name__ == '__main__':
# # 数据库连接参数
# dbconfig = {'host':'',
# 'port': ,
# 'user':'',
# 'passwd':'',
# 'db':'',
# 'charset':'utf8'}
#
# # # 连接数据库,创建这个类的实例
# # db = MySQL(dbconfig)
# #
# # # 操作数据库
# # sql = "insert into cmf_gifs(name) values('123')"
# # # sql2 = "UPDATE lbs_member SET username='123' WHERE userid=123"
# # # sql3 = "SELECT * FROM lbs_member LIMIT 5"
# # # db.query(sql)
# # # result = db.fetchAllRows()
# # # result = db.update(sql2)
# # result = db.insert(sql)
# # print(result)
# # db.close()