diff options
70 files changed, 12550 insertions, 0 deletions
diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e58b9b0 --- /dev/null +++ b/Makefile @@ -0,0 +1,34 @@ +HOSTS=nodes.txt +ALIVES=alive.txt +USER=irisaple_pacemaker2 +SSH_CONFIG=ssh_config +OPTIONS=-x "-F $(SSH_CONFIG)" +FOPTIONS=$(OPTIONS) -h $(ALIVES) + +.PHONY: alive deploy clean pink nuke + +$(ALIVES) alive: + pssh $(OPTIONS) -h $(HOSTS) "uname" -i | grep "\[SUCCESS\]"\ + | awk '{print $$4}' > $(ALIVES) + +deploy_libs: requests simplejson + +deploy_client: lastfm.py process.py + +deploy_client deploy_libs: $(ALIVES) + psshscp $(FOPTIONS) -r $(filter-out $(ALIVES), $+) /home/$(USER) + touch $@ + +deploy: deploy_client deploy_libs + +clean: + pssh $(OPTIONS) -h $(HOSTS) "rm -f *.txt; rm -f *.out" + +distclean: + rm -f alive.txt deploy_client deploy_libs + +ping: $(ALIVES) + pssh $(FOPTIONS) "uname" + +nuke: + pnuke $(OPTIONS) -h $(HOSTS) "python" diff --git a/keys.txt b/keys.txt new file mode 100644 index 0000000..a34bd6b --- /dev/null +++ b/keys.txt @@ -0,0 +1,62 @@ +552a615b6dcbe5b5e1c215c6d50b8028 +f0f0e21e86c64377fd94d57c055f5d35 +64d9735add9295ae2a280e27ded3cf2b +5db42e8742db44aa2a62ecbe5c61617b +9c3020ea217f9d05e700d3c9a666dc2b +f3d17e38b2d25e379e5ce3e13ae76807 +f5ebee8e871bec4d24fea0392f480f08 +b87e1776746c277d813c579129ad4fa1 +ffeb2842a9e1d8323bb98f2f9e10984d +d60c7308aa49bc9243faa25e352efa20 +d5a32b9b16a574885f3ce86267835b9a +f99f797f69b0b8223fce3abf8ddf6f73 +d526dfa7fca6ac5938f6cae14a17c6fe +f3224d8f7e6446aacf85305f0f26d9c0 +49451fb92f801e0a1e51a78acd2d97b5 +6643fffb38b8e891a2efc5fd1902121f +0e963bec546b66cfdbcd982e73fd5efa +54f5d47acca3d4634dbfa183107a10ef +fe61928397d7b813afe82f5bf4cc161e +8b9d3da5de0f4812f5761110f87164c0 +3fefe4a66e66a3bd697f71d1265ba75a +f61c5f84ef46b6e06702ccf1470e1fee +ba6d88f1da35d3171f6121ad77c8efcb +f7eb7053872ecc52e3637d4bbb5bf30c +3110511c1e321d7adb3c6b3ecc3cd4ff +f9cec96e9082aad3474ce9fa9e637655 +b8b209310bd13140e1818ab45a792f20 +11e8e6da2265c67e14735a1bfeb16789 +6099aee46b6335c28718037bd6c95cf0 +8cf5daa654c8dd1c07c7ae02f6a689e8 +9278f252be901457a2da37df008604dc +eb8dec752cb7de70dd29a2380ddc7448 +cac095ebdd6031c593556509fbb2fd49 +2d5b7248209e96caec467c98746971ab +a3325bfce4164736b735bb698d19d818 +7db11c62b8f3e7cc67cf01456414a5a8 +b6d8d4cac67a0630f881708882c486e5 +c900f92fe7295c0046ce7ddffe7a6e26 +6f1e3426e59edc21e8e6f039c031c529 +47d52fdbf769abd95f581d6db96cef79 +bd04715018eb17e2c01e73c1fc1388df +2d2ac61b77f0fc4dca07f001a0e0f4c0 +fc5a7e27d9f9c4be3d8b928329b71ca4 +1db82dc8b26d013fb6446cd691660e6c +ce15d8d85cd11a392925207c5b57837c +98933635486708fc1d7be3f21411b37a +408a9e0d5190600984f59a4fcea0954c +94807fa3aa169c7dcb12e690b4a3836c +08c5e674984d5e630ac8730cea68dd89 +fd2ab62e028810be909ef392e7453ced +40ab3d03472c53f19e410c46835dcffb +3ebda1b404add0ccf3556f581a702d8f +dc4cfc46c96f97e24dc8ffeaa5f32f98 +09cb073743b9b5b09924279d51a1e649 +a86dcaa6d953c313106de897ee897de3 +c4058f3a02e4bac530b4de182a8b5f19 +bc377c0ea32855452faa0c007e11ca2e +ac94506bed1b78e60ff7225f5526855b +021af7217b6ba8b3bfa22fd47174473c +723b9748ef341825d3843dfef3f003ee +88dd64077a673223915be37285693e4f +0548b5176eb00f6efc4e1f7133d7e202 diff --git a/lastfm.py b/lastfm.py new file mode 100644 index 0000000..60781d5 --- /dev/null +++ b/lastfm.py @@ -0,0 +1,141 @@ +from __future__ import with_statement +import requests + +import sys +import simplejson as json +from time import sleep +from collections import deque +from types import * + +URL = "http://ws.audioscrobbler.com/2.0/" + +def init(api): + global API_KEY + API_KEY = api + +def make_request(method, payload): + params = {"api_key": API_KEY, "format": "json", + "method": method} + user_agent = {'User-agent': 'Mozilla/5.0'} + params.update(payload) + try: + r = requests.get(URL, params=params, headers=user_agent) + except requests.exceptions.ConnectionError: + sleep(30) + r = requests.get(URL, params=params, headers=user_agent) + + try: + answer = json.loads(r.text) + except ValueError: # request failed for some reason, retrying + i = 0 + while r.status_code == 503 and i < 3: + sleep(0.3) + r = requests.get(URL, params=params, headers=user_agent) + i += 1 + try: + answer = json.loads(r.text) + except ValueError: # giving up + answer = None + + return answer + +def get_user_info(user): + try: + return make_request("user.getInfo", {"user": user})["user"] + except KeyError: + exit("Could not find user " + user) + +def get_friends(user): + r = make_request("user.getFriends", {"user": user, "recenttracks": "0"}) + if not r: + print "Unable to get user " + user + return + yield + try: + friends = r["friends"] + n_friends = int(friends["@attr"]["total"]) + n_pages = int(friends["@attr"]["totalPages"]) + friends = friends["user"] + except KeyError: + print "Problem with user " + user + return + yield + if type(friends) is dict: + friends = [friends] + for u in friends: + yield u + for page in xrange(2, n_pages+1): + sleep(0.1) + r = make_request("user.getFriends", {"user": user, + "recenttracks": "0", + "page": page}) + if not r: + continue + try: + f = r["friends"]["user"] + except KeyError: + print r + continue + if type(f) is dict: + f = [f] + for us in f: + yield us + +def build_graph(filename): + result = {} + try: + with open(filename) as f: + for line in f: + values = line.strip().split("\t") + result[values[0]] = values[1:] + except IOError: + pass + print len(result) + return result + +def print_set(s): + i = 0 + file = None + for item in s: + if i % 100 == 0: + if file: + file.close() + file = open(str(i/100) + ".txt", "w") + file.write(str(item) + "\n") + i += 1 + if file: + file.close() + +def bfs(graph, seed, process=True): + queue = deque([seed]) + visited = set([seed]) + to_do = set([]) + with open(seed + ".txt", "a") as file: + i = 0 + while queue: + i += 1 + if i % 10 == 0: + print "Visited: {0}, Queued: {1}".format(i, len(queue)) + c_node = queue.popleft() + try: + friends = graph[c_node] + except KeyError: + if not process: + to_do.add(c_node) + continue + friends = get_friends(c_node) + friends = [friend["name"] for friend in get_friends(c_node)] + sleep(0.1) + new = set(friends) - visited + visited |= new + queue.extend(new) + if c_node not in graph: + file.write(c_node + "\t" + "\t".join(friends) + "\n") + if not process: + print_set(to_do) + +if __name__ == "__main__": + seed = sys.argv[1] + process = False if len(sys.argv) >=3 else True + graph = build_graph(seed + ".txt") + bfs(graph, seed, process) diff --git a/manager.py b/manager.py new file mode 100644 index 0000000..2221512 --- /dev/null +++ b/manager.py @@ -0,0 +1,54 @@ +from subprocess import Popen +from os.path import join +from time import sleep +from glob import glob +from collections import deque + +SSH_CONFIG = "ssh_config" +to_do = set(glob("*.txt")) - set(["alive.txt", "keys.txt", "nodes.txt"]) +done = set(f[:-4] for f in glob("*.txt.out")) +to_do = deque(to_do - done) +apis = deque(line.strip() for line in open("keys.txt")) +servers = deque(line.strip() for line in open("alive.txt")) +processing = [] + +while to_do or processing: + to_do.extend(task["file"] for task in processing + if task["status"] == "collect") + processing = [task for task in processing if task["status"].endswith("ing")] + + if servers and to_do: + f = to_do.pop() + host = servers.popleft() + api = apis.pop() + apis.appendleft(api) + task = {"host": host, "file": f, "status": "uploading", "api": api, + "process": Popen(["scp", "-F", SSH_CONFIG, f, + host + ":"])} + processing.append(task) + + for task in processing: + rc = task["process"].poll() + if rc is None: + continue + elif rc != 0: + servers.append(task["host"]) + task["status"] = "collect" + continue + + if task["status"] == "uploading": + task["status"] = "processing" + task["process"] = Popen(["ssh", "-F", SSH_CONFIG, + task["host"], + "python2 process.py " + task["api"] + " " + + task["file"]]) + elif task["status"] == "processing": + task["status"] ="downloading" + task["process"] = Popen(["scp", "-F", SSH_CONFIG, + task["host"] + ":" + task["file"] + ".out", + "./"]) + elif task["status"] == "downloading": + task["status"] = "done" + servers.appendleft(task["host"]) + + sleep(0.5) diff --git a/nodes.txt b/nodes.txt new file mode 100644 index 0000000..2208d94 --- /dev/null +++ b/nodes.txt @@ -0,0 +1,394 @@ +planetlab-01.naist.jp +planetlab1.nileu.edu.eg +charon.cs.binghamton.edu +peeramidion.irisa.fr +planetlab01.erin.utoronto.ca +pl1.uni-rostock.de +pl1.bit.uoit.ca +planetlab-1a.ics.uci.edu +node1.planetlab.etl.luc.edu +planetlab02.erin.utoronto.ca +planetlab3.cs.uiuc.edu +deimos.cecalc.ula.ve +planetlabnode-2.docomolabs-usa.com +planetlab1.iitr.ernet.in +planetlab2.cse.msu.edu +planetlab2.cs.duke.edu +planetlab1.cs.uoi.gr +planetlab0.ias.csusb.edu +planetlab5.cs.duke.edu +planetlab-04.naist.jp +pl1.cewit.stonybrook.edu +pli1-br-3.hpl.hp.com +pli2-br-2.hpl.hp.com +planetlab2.iii.u-tokyo.ac.jp +planetlab1.ece.ucdavis.edu +pli1-br-2.hpl.hp.com +pli2-br-1.hpl.hp.com +onelab10.pl.sophia.inria.fr +pli1-tlnx.hpl.hp.com +pluto.cs.binghamton.edu +pl2.cewit.stonybrook.edu +planetlab1.cs.wayne.edu +planetlab-02.naist.jp +planetlab2.olsztyn.rd.tp.pl +ple1.cesnet.cz +planlab1.cs.caltech.edu +planlab2.cs.caltech.edu +planetlab2.uc.edu +planetlab3.csail.mit.edu +planetlab-03.naist.jp +planetlab2.sics.se +planetlab-1.vuse.vanderbilt.edu +planetlab-2.vuse.vanderbilt.edu +planetlab-3.ics.uci.edu +planetlab1.n.info.eng.osaka-cu.ac.jp +node1.planet-lab.titech.ac.jp +node3.planet-lab.titech.ac.jp +node4.planet-lab.titech.ac.jp +planetlab3.n.info.eng.osaka-cu.ac.jp +pn1-planetlab.huawei.com +pn2-planetlab.huawei.com +node0.planetlab.etl.luc.edu +planetlab1.keldysh.ru +planetlab2.keldysh.ru +planetlab1.cs.dartmouth.edu +planetlab1.simula.no +itchy.cs.uga.edu +planetlab-1.ece.iastate.edu +mnc2.pusan.ac.kr +planetlab1.iin-bit.com.cn +planetlab2.nileu.edu.eg +planetlab1.cse.msu.edu +planetlabnode-1.docomolabs-usa.com +mnc1.pusan.ac.kr +planetlab2.iitr.ernet.in +fobos.cecalc.ula.ve +planetlab3.mini.pw.edu.pl +plab-1.sinp.msu.ru +planetlabpc0.upf.edu +planet1.unipr.it +planetlab1.jhu.edu +planetlab1.mnlab.cti.depaul.edu +inriarennes2.irisa.fr +plab2.nec-labs.com +ampelos.ipv6.lip6.fr +planetlab2.georgetown.edu +planetlab2.byu.edu +planet2.unipr.it +plab2.create-net.org +plgmu3.ite.gmu.edu +planetlab1.cs.unc.edu +planetlab2.informatik.uni-wuerzburg.de +planetlab-node3.it-sudparis.eu +pl1.6test.edu.cn +pln.zju.edu.cn +planet02.hhi.fraunhofer.de +pl2.uni-rostock.de +prometeusz.we.po.opole.pl +mars.planetlab.haw-hamburg.de +planetlab2.cs.uoi.gr +planetlab2.dit.upm.es +lim-planetlab-1.univ-reunion.fr +planetlab4.cnds.jhu.edu +planetslug5.cse.ucsc.edu +earth.cs.brown.edu +pl2.6test.edu.cn +inriarennes1.irisa.fr +planetlab4.csres.utexas.edu +planetlab1.tlm.unavarra.es +plab2.larc.usp.br +planetlab1.u-strasbg.fr +planetlab-3.cs.ucy.ac.cy +planetlab5.cs.uiuc.edu +scratchy.comlab.bth.se +planetlab-04.vt.nodes.planet-lab.org +planetlab1.urv.cat +planetlab3.cse.nd.edu +planetlab-um00.di.uminho.pt +planetlab4.rutgers.edu +planetlab2.acis.ufl.edu +planetlab-1.ssvl.kth.se +planetlab-2.cs.unibas.ch +node2pl.planet-lab.telecom-lille1.eu +planetlab2.xeno.cl.cam.ac.uk +planetlab4.cs.st-andrews.ac.uk +merkur.planetlab.haw-hamburg.de +host2.planetlab.informatik.tu-darmstadt.de +ple2.cesnet.cz +planetlab-2.ida.liu.se +planetlab-1.imag.fr +medea.inf.uth.gr +planetlab2.jhu.edu +planetlab2.tau.ac.il +planetlab2.nakao-lab.org +planetlab1.tmit.bme.hu +planetlab1.mta.ac.il +wlab02.pl.sophia.inria.fr +planetlab9.millennium.berkeley.edu +ple2.tu.koszalin.pl +planetlab2.science.unitn.it +planetlab2.polito.it +planet1.l3s.uni-hannover.de +plab1.create-net.org +planetlab2.pjwstk.edu.pl +planetlab2.cs.uit.no +planetlab1.cs.uit.no +zoi.di.uoa.gr +planetlab2.rd.tut.fi +146-179.surfsnel.dsl.internl.net +planet1.inf.tu-dresden.de +planet1.dsp.ac.cn +vicky.planetlab.ntua.gr +planetlab2.fri.uni-lj.si +planetlab2.ionio.gr +planetlab01.dis.unina.it +planetlab2.extern.kuleuven.be +planetlab-1.research.netlab.hut.fi +planetlab-2.research.netlab.hut.fi +planetlab1.iitkgp.ac.in +planetlab3.xeno.cl.cam.ac.uk +peeramide.irisa.fr +planetlab04.cnds.unibe.ch +gschembra3.diit.unict.it +planetlab02.tkn.tu-berlin.de +dplanet1.uoc.edu +planetlab-1.cs.unibas.ch +planet2.servers.ua.pt +planetlab1.xeno.cl.cam.ac.uk +planetlab1.utt.fr +onelab3.info.ucl.ac.be +planetlab-node1.it-sudparis.eu +planet-lab-node1.netgroup.uniroma2.it +planetlab-2.di.fc.ul.pt +planetlab-node-01.ucd.ie +planetlab02.dis.unina.it +planetlab1.tau.ac.il +planet2.elte.hu +node1pl.planet-lab.telecom-lille1.eu +planet1.servers.ua.pt +planetlab1.upc.es +planetlab3.hiit.fi +planetlab2.ics.forth.gr +planetlab1.postel.org +pl1.csl.utoronto.ca +planetlab3.cs.st-andrews.ac.uk +planet1.zib.de +planetlab1.willab.fi +planetlab1.eurecom.fr +ple2.ipv6.lip6.fr +planetlab1.unineuchatel.ch +dplanet2.uoc.edu +host3-plb.loria.fr +lefthand.eecs.harvard.edu +planetlabpc1.upf.edu +planetlab-2.man.poznan.pl +ple1.dmcs.p.lodz.pl +planetlab1.fct.ualg.pt +planetlab3.cs.columbia.edu +ple02.fc.univie.ac.at +iraplab1.iralab.uni-karlsruhe.de +planetlab1.informatik.uni-wuerzburg.de +planetlab2.unineuchatel.ch +planetlab-1.iscte.pt +netapp7.cs.kookmin.ac.kr +planetlab2.utt.fr +planetlab-1.ing.unimo.it +planetlab1.informatik.uni-erlangen.de +planetlab01.tkn.tu-berlin.de +onelab2.info.ucl.ac.be +planetlab1.cesnet.cz +planetlab1.upm.ro +planetlab-1.tagus.ist.utl.pt +planetlab1.dojima.wide.ad.jp +planetlab1.singaren.net.sg +planetlab1.montefiore.ulg.ac.be +planetlab1-buenosaires.lan.redclara.net +ple01.fc.univie.ac.at +iraplab2.iralab.uni-karlsruhe.de +planetlab3.csee.usf.edu +netapp6.cs.kookmin.ac.kr +node1.planetlab.mathcs.emory.edu +planetlab2.cs.aueb.gr +plab2.ple.silweb.pl +planetlab3.upc.es +plnode01.cs.mu.oz.au +planet2.l3s.uni-hannover.de +righthand.eecs.harvard.edu +planetlab1.csg.uzh.ch +planetlab6.flux.utah.edu +planetlab7.flux.utah.edu +node2.planetlab.mathcs.emory.edu +planetlab2.ntu.nodes.planet-lab.org +planetlab1.plab.ege.edu.tr +planetlab1.informatik.uni-goettingen.de +planetlab6.goto.info.waseda.ac.jp +planetlab2.bgu.ac.il +planetlab2.montefiore.ulg.ac.be +planetlab1.cs.colorado.edu +pl2.pku.edu.cn +planet1.elte.hu +miranda.planetlab.cs.umd.edu +pl2.sos.info.hiroshima-cu.ac.jp +planetlab2.willab.fi +planet6.cs.ucsb.edu +planetlab-2.elisa.cpsc.ucalgary.ca +planet2.pnl.nitech.ac.jp +planet2.zib.de +planetlab2.clemson.edu +planetlab6.csres.utexas.edu +planetlab1.engr.uconn.edu +planetlab2.upc.es +planetlab1.exp-math.uni-essen.de +planetlab3.informatik.uni-erlangen.de +planetlab1.ewi.tudelft.nl +planetlab2.ewi.tudelft.nl +planetlab2.csg.uzh.ch +planetlab1.pjwstk.edu.pl +onelab1.info.ucl.ac.be +planetlab1.di.fct.unl.pt +planetlab-2.ing.unimo.it +planetlab4.hiit.fi +planetvs2.informatik.uni-stuttgart.de +planetlab2lannion.elibel.tm.fr +planetlab1.ionio.gr +planetlab-3.imperial.ac.uk +planetlab1.ifi.uio.no +planetlab1.ci.pwr.wroc.pl +planetlab-um10.di.uminho.pt +planetlab2.ifi.uio.no +planetlab2.cs.vu.nl +planetlab-4.imperial.ac.uk +planetlab2.fct.ualg.pt +vn4.cse.wustl.edu +planetlab-2.fokus.fraunhofer.de +host4-plb.loria.fr +planetlab1.bgu.ac.il +planetlab4.n.info.eng.osaka-cu.ac.jp +plnodea.plaust.edu.cn +planetlab-2.webedu.ccu.edu.tw +planetlab3.wail.wisc.edu +planet2.inf.tu-dresden.de +planet-lab-node2.netgroup.uniroma2.it +planetlab2.cs.ucla.edu +planetlab1.cs.ucla.edu +onelab-2.fhi-fokus.de +planet11.csc.ncsu.edu +planetlab5.eecs.umich.edu +planet12.csc.ncsu.edu +ebb.colgate.edu +pli1-br-1.hpl.hp.com +planetlab7.cs.duke.edu +pl2snu.koren.kr +planetlab-03.vt.nodes.planet-lab.org +planetlab6.cs.uiuc.edu +pl1.pku.edu.cn +planetslug7.cse.ucsc.edu +planetlab2.plab.ege.edu.tr +planetlab4.cs.uchicago.edu +planetlab2.cesnet.cz +planetlab-6.ece.iastate.edu +planetlab3.netmedia.gist.ac.kr +planetlab1.ucsd.edu +planetlab2.ucsd.edu +ricepl-5.cs.rice.edu +planet4.cc.gt.atl.ga.us +planetlab2.singaren.net.sg +planet1.pnl.nitech.ac.jp +planetlab1.ustc.edu.cn +pnode2.pdcc-ntu.singaren.net.sg +planetlab-n2.wand.net.nz +planetlab1.clemson.edu +planetlab3.rutgers.edu +planetlab1.polito.it +gschembra4.diit.unict.it +planetlab2.s3.kth.se +planetlab1.ecs.vuw.ac.nz +planetlab2.exp-math.uni-essen.de +planetlab1.s3.kth.se +nodeb.howard.edu +pli1-pa-5.hpl.hp.com +planetlab2.een.orst.edu +pl1.ucs.indiana.edu +planetlab2.cs.columbia.edu +planetlab1.science.unitn.it +planetlab2.ecs.vuw.ac.nz +ple2.dmcs.p.lodz.pl +planet1.scs.stanford.edu +planetlab1.cs.umass.edu +planetlab-2.tagus.ist.utl.pt +planetlab3.cs.uchicago.edu +planetlab2.di.fct.unl.pt +planet2.scs.stanford.edu +planetlab1.koganei.wide.ad.jp +planetlab4.wail.wisc.edu +plnode02.cs.mu.oz.au +planetlab2.netlab.uky.edu +planetlab2.eurecom.fr +planetlab2.pop-pa.rnp.br +planetlab1.byu.edu +planetlab2.cs.umass.edu +plab2.cs.ust.hk +planetlab-02.ece.uprm.edu +planetlab2.tamu.edu +pl2.rcc.uottawa.ca +planetlab-2.cs.auckland.ac.nz +planetlab02.mpi-sws.mpg.de +pl1.rcc.uottawa.ca +planetlab2.cs.ubc.ca +planetlab6.cs.duke.edu +planetlab6.csail.mit.edu +saturn.cs.brown.edu +planetlab1.utdallas.edu +ricepl-4.cs.rice.edu +planetlab1.netlab.uky.edu +pnode1.pdcc-ntu.singaren.net.sg +nodea.howard.edu +planetlab02.just.edu.jo +planet5.cs.ucsb.edu +cs-planetlab3.cs.surrey.sfu.ca +pl2.ucs.indiana.edu +planetlab5.csres.utexas.edu +planetlab4.csee.usf.edu +cs-planetlab4.cs.surrey.sfu.ca +planetlab1.pop-pa.rnp.br +planetlab2.cs.uml.edu +planetlab1.cs.uml.edu +planetlab5.williams.edu +planetslug4.cse.ucsc.edu +pli1-pa-4.hpl.hp.com +planetlab4.williams.edu +planetlab3.comp.nus.edu.sg +mtuplanetlab2.cs.mtu.edu +planetlab2.utdallas.edu +server3.planetlab.iit-tech.net +planetlab2.koganei.wide.ad.jp +planetlab-1.calpoly-netlab.net +nis-planet1.doshisha.ac.jp +planetlab2.csee.usf.edu +planetlab1.just.edu.jo +planetlab-2.ece.iastate.edu +server2.planetlab.iit-tech.net +planetlab1.ntu.nodes.planet-lab.org +planetlab-1.cs.auckland.ac.nz +pl2.eng.monash.edu.au +planetlab4.singaren.net.sg +server4.planetlab.iit-tech.net +planetlab2.csuohio.edu +planetlab1.csuohio.edu +planetlab-1.cs.uic.edu +planetlab-2.cs.uic.edu +planetlab2.ci.pwr.wroc.pl +flow.colgate.edu +planetlab3.di.unito.it +planetlab05.mpi-sws.mpg.de +planetlab-01.vt.nodes.planet-lab.org +planetlab2.tsuniv.edu +jupiter.planetlab.carleton.ca +planetlab1.unl.edu +planetlab1.georgetown.edu +planetlab1.rutgers.edu +planetlab1.sics.se +planetlab2.mnlab.cti.depaul.edu +planetlab1lannion.elibel.tm.fr +planetlab1.cs.vu.nl diff --git a/process.py b/process.py new file mode 100644 index 0000000..a0ce5e5 --- /dev/null +++ b/process.py @@ -0,0 +1,29 @@ +from __future__ import with_statement +from lastfm import get_friends, init +import sys +import time + + +def process(filename): + i = 0 + with open(filename) as ih: + to_do = set(line.strip() for line in ih) + done = set([]) + try: + with open(filename + ".out") as ih: + done = set(line.strip().split("\t")[0] for line in ih) + except IOError: + pass + with open(filename + ".out", "a") as oh: + for user in to_do - done: + i += 1 + friends = get_friends(user) + friends = [friend["name"] for friend in friends] + if friends: + oh.write(user.decode("utf8").encode("utf8") + "\t" + + u"\t".join(friends).encode("utf8") + "\n") + time.sleep(0.1) + +if __name__ == "__main__": + init(sys.argv[1]) + process(sys.argv[2]) diff --git a/requests/__init__.py b/requests/__init__.py new file mode 100644 index 0000000..e3300cf --- /dev/null +++ b/requests/__init__.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- + +# __ +# /__) _ _ _ _ _/ _ +# / ( (- (/ (/ (- _) / _) +# / + +""" +requests +~~~~~~~~ + +:copyright: (c) 2012 by Kenneth Reitz. +:license: ISC, see LICENSE for more details. + +""" + +__title__ = 'requests' +__version__ = '0.10.0' +__build__ = 0x001000 +__author__ = 'Kenneth Reitz' +__license__ = 'ISC' +__copyright__ = 'Copyright 2012 Kenneth Reitz' + + + +from . import utils +from .models import Request, Response +from .api import request, get, head, post, patch, put, delete, options +from .sessions import session, Session +from .status_codes import codes +from .exceptions import ( + RequestException, Timeout, URLRequired, + TooManyRedirects, HTTPError, ConnectionError +) diff --git a/requests/api.py b/requests/api.py new file mode 100644 index 0000000..b7d4158 --- /dev/null +++ b/requests/api.py @@ -0,0 +1,116 @@ +# -*- coding: utf-8 -*- + +""" +requests.api +~~~~~~~~~~~~ + +This module implements the Requests API. + +:copyright: (c) 2012 by Kenneth Reitz. +:license: ISC, see LICENSE for more details. + +""" + +from . import sessions + +def request(method, url, **kwargs): + """Constructs and sends a :class:`Request <Request>`. + Returns :class:`Response <Response>` object. + + :param method: method for the new :class:`Request` object. + :param url: URL for the new :class:`Request` object. + :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`. + :param data: (optional) Dictionary or bytes to send in the body of the :class:`Request`. + :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`. + :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. + :param files: (optional) Dictionary of 'name': file-like-objects (or {'name': ('filename', fileobj)}) for multipart encoding upload. + :param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth. + :param timeout: (optional) Float describing the timeout of the request. + :param allow_redirects: (optional) Boolean. Set to True if POST/PUT/DELETE redirect following is allowed. + :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. + :param return_response: (optional) If False, an un-sent Request object will returned. + :param session: (optional) A :class:`Session` object to be used for the request. + :param config: (optional) A configuration dictionary. + :param verify: (optional) if ``True``, the SSL cert will be verified. A CA_BUNDLE path can also be provided. + :param prefetch: (optional) if ``True``, the response content will be immediately downloaded. + """ + + s = kwargs.pop('session') if 'session' in kwargs else sessions.session() + return s.request(method=method, url=url, **kwargs) + + + +def get(url, **kwargs): + """Sends a GET request. Returns :class:`Response` object. + + :param url: URL for the new :class:`Request` object. + :param **kwargs: Optional arguments that ``request`` takes. + """ + + kwargs.setdefault('allow_redirects', True) + return request('get', url, **kwargs) + + +def options(url, **kwargs): + """Sends a OPTIONS request. Returns :class:`Response` object. + + :param url: URL for the new :class:`Request` object. + :param **kwargs: Optional arguments that ``request`` takes. + """ + + kwargs.setdefault('allow_redirects', True) + return request('options', url, **kwargs) + + +def head(url, **kwargs): + """Sends a HEAD request. Returns :class:`Response` object. + + :param url: URL for the new :class:`Request` object. + :param **kwargs: Optional arguments that ``request`` takes. + """ + + kwargs.setdefault('allow_redirects', True) + return request('head', url, **kwargs) + + +def post(url, data=None, **kwargs): + """Sends a POST request. Returns :class:`Response` object. + + :param url: URL for the new :class:`Request` object. + :param data: (optional) Dictionary or bytes to send in the body of the :class:`Request`. + :param **kwargs: Optional arguments that ``request`` takes. + """ + + return request('post', url, data=data, **kwargs) + + +def put(url, data=None, **kwargs): + """Sends a PUT request. Returns :class:`Response` object. + + :param url: URL for the new :class:`Request` object. + :param data: (optional) Dictionary or bytes to send in the body of the :class:`Request`. + :param **kwargs: Optional arguments that ``request`` takes. + """ + + return request('put', url, data=data, **kwargs) + + +def patch(url, data=None, **kwargs): + """Sends a PATCH request. Returns :class:`Response` object. + + :param url: URL for the new :class:`Request` object. + :param data: (optional) Dictionary or bytes to send in the body of the :class:`Request`. + :param **kwargs: Optional arguments that ``request`` takes. + """ + + return request('patch', url, data=data, **kwargs) + + +def delete(url, **kwargs): + """Sends a DELETE request. Returns :class:`Response` object. + + :param url: URL for the new :class:`Request` object. + :param **kwargs: Optional arguments that ``request`` takes. + """ + + return request('delete', url, **kwargs) diff --git a/requests/async.py b/requests/async.py new file mode 100644 index 0000000..9488447 --- /dev/null +++ b/requests/async.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- + +""" +requests.async +~~~~~~~~~~~~~~ + +This module contains an asynchronous replica of ``requests.api``, powered +by gevent. All API methods return a ``Request`` instance (as opposed to +``Response``). A list of requests can be sent with ``map()``. +""" + +try: + import gevent + from gevent import monkey as curious_george + from gevent.pool import Pool +except ImportError: + raise RuntimeError('Gevent is required for requests.async.') + +# Monkey-patch. +curious_george.patch_all(thread=False) + +from . import api + + +__all__ = ( + 'map', + 'get', 'options', 'head', 'post', 'put', 'patch', 'delete', 'request' +) + + +def patched(f): + """Patches a given API function to not send.""" + + def wrapped(*args, **kwargs): + + kwargs['return_response'] = False + kwargs['prefetch'] = True + + config = kwargs.get('config', {}) + config.update(safe_mode=True) + + kwargs['config'] = config + + return f(*args, **kwargs) + + return wrapped + + +def send(r, pool=None): + """Sends the request object using the specified pool. If a pool isn't + specified this method blocks. Pools are useful because you can specify size + and can hence limit concurrency.""" + + if pool != None: + return pool.spawn(r.send) + + return gevent.spawn(r.send) + + +# Patched requests.api functions. +get = patched(api.get) +options = patched(api.options) +head = patched(api.head) +post = patched(api.post) +put = patched(api.put) +patch = patched(api.patch) +delete = patched(api.delete) +request = patched(api.request) + + +def map(requests, prefetch=True, size=None): + """Concurrently converts a list of Requests to Responses. + + :param requests: a collection of Request objects. + :param prefetch: If False, the content will not be downloaded immediately. + :param size: Specifies the number of requests to make at a time. If None, no throttling occurs. + """ + + requests = list(requests) + + pool = Pool(size) if size else None + jobs = [send(r, pool) for r in requests] + gevent.joinall(jobs) + + if prefetch: + [r.response.content for r in requests] + + return [r.response for r in requests]
\ No newline at end of file diff --git a/requests/auth.py b/requests/auth.py new file mode 100644 index 0000000..d1da4ee --- /dev/null +++ b/requests/auth.py @@ -0,0 +1,138 @@ +# -*- coding: utf-8 -*- + +""" +requests.auth +~~~~~~~~~~~~~ + +This module contains the authentication handlers for Requests. +""" + +import time +import hashlib + +from base64 import b64encode +from urlparse import urlparse + +from .utils import randombytes, parse_dict_header + + + +def _basic_auth_str(username, password): + """Returns a Basic Auth string.""" + return 'Basic %s' % b64encode('%s:%s' % (username, password)) + + +class AuthBase(object): + """Base class that all auth implementations derive from""" + + def __call__(self, r): + raise NotImplementedError('Auth hooks must be callable.') + + +class HTTPBasicAuth(AuthBase): + """Attaches HTTP Basic Authentication to the given Request object.""" + def __init__(self, username, password): + self.username = str(username) + self.password = str(password) + + def __call__(self, r): + r.headers['Authorization'] = _basic_auth_str(self.username, self.password) + return r + + +class HTTPProxyAuth(HTTPBasicAuth): + """Attaches HTTP Proxy Authenetication to a given Request object.""" + def __call__(self, r): + r.headers['Proxy-Authorization'] = _basic_auth_str(self.username, self.password) + return r + + +class HTTPDigestAuth(AuthBase): + """Attaches HTTP Digest Authentication to the given Request object.""" + def __init__(self, username, password): + self.username = username + self.password = password + + def handle_401(self, r): + """Takes the given response and tries digest-auth, if needed.""" + + s_auth = r.headers.get('www-authenticate', '') + + if 'digest' in s_auth.lower(): + + last_nonce = '' + nonce_count = 0 + + chal = parse_dict_header(s_auth.replace('Digest ', '')) + + realm = chal['realm'] + nonce = chal['nonce'] + qop = chal.get('qop') + algorithm = chal.get('algorithm', 'MD5') + opaque = chal.get('opaque', None) + + algorithm = algorithm.upper() + # lambdas assume digest modules are imported at the top level + if algorithm == 'MD5': + H = lambda x: hashlib.md5(x).hexdigest() + elif algorithm == 'SHA': + H = lambda x: hashlib.sha1(x).hexdigest() + # XXX MD5-sess + KD = lambda s, d: H("%s:%s" % (s, d)) + + if H is None: + return None + + # XXX not implemented yet + entdig = None + p_parsed = urlparse(r.request.url) + path = p_parsed.path + if p_parsed.query: + path += '?' + p_parsed.query + + A1 = '%s:%s:%s' % (self.username, realm, self.password) + A2 = '%s:%s' % (r.request.method, path) + + if qop == 'auth': + if nonce == last_nonce: + nonce_count += 1 + else: + nonce_count = 1 + last_nonce = nonce + + ncvalue = '%08x' % nonce_count + cnonce = (hashlib.sha1("%s:%s:%s:%s" % ( + nonce_count, nonce, time.ctime(), randombytes(8))) + .hexdigest()[:16] + ) + noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2)) + respdig = KD(H(A1), noncebit) + elif qop is None: + respdig = KD(H(A1), "%s:%s" % (nonce, H(A2))) + else: + # XXX handle auth-int. + return None + + # XXX should the partial digests be encoded too? + base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \ + 'response="%s"' % (self.username, realm, nonce, path, respdig) + if opaque: + base += ', opaque="%s"' % opaque + if entdig: + base += ', digest="%s"' % entdig + base += ', algorithm="%s"' % algorithm + if qop: + base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce) + + r.request.headers['Authorization'] = 'Digest %s' % (base) + r.request.send(anyway=True) + _r = r.request.response + _r.history.append(r) + + return _r + + return r + + def __call__(self, r): + r.register_hook('response', self.handle_401) + return r diff --git a/requests/defaults.py b/requests/defaults.py new file mode 100644 index 0000000..424d373 --- /dev/null +++ b/requests/defaults.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- + +""" +requests.defaults +~~~~~~~~~~~~~~~~~ + +This module provides the Requests configuration defaults. + +Configurations: + +:base_headers: Default HTTP headers. +:verbose: Stream to write request logging to. +:max_redirects: Maximum number of redirects allowed within a request.s +:keep_alive: Reuse HTTP Connections? +:max_retries: The number of times a request should be retried in the event of a connection failure. +:danger_mode: If true, Requests will raise errors immediately. +:safe_mode: If true, Requests will catch all errors. +:pool_maxsize: The maximium size of an HTTP connection pool. +:pool_connections: The number of active HTTP connection pools to use. +""" + +from . import __version__ + +defaults = dict() + + +defaults['base_headers'] = { + 'User-Agent': 'python-requests/%s' % __version__, + 'Accept-Encoding': ', '.join(('identity', 'deflate', 'compress', 'gzip')), + 'Accept': '*/*' +} + +defaults['verbose'] = None +defaults['max_redirects'] = 30 +defaults['pool_connections'] = 10 +defaults['pool_maxsize'] = 10 +defaults['max_retries'] = 0 +defaults['danger_mode'] = False +defaults['safe_mode'] = False +defaults['keep_alive'] = True diff --git a/requests/exceptions.py b/requests/exceptions.py new file mode 100644 index 0000000..c7b98e6 --- /dev/null +++ b/requests/exceptions.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- + +""" +requests.exceptions +~~~~~~~~~~~~~~~~~~~ + +This module contains the set of Requests' exceptions. + +""" + +class RequestException(Exception): + """There was an ambiguous exception that occurred while handling your + request.""" + +class HTTPError(RequestException): + """An HTTP error occurred.""" + +class ConnectionError(RequestException): + """A Connection error occurred.""" + +class SSLError(ConnectionError): + """An SSL error occurred.""" + +class Timeout(RequestException): + """The request timed out.""" + +class URLRequired(RequestException): + """A valid URL is required to make a request.""" + +class TooManyRedirects(RequestException): + """Too many redirects.""" diff --git a/requests/hooks.py b/requests/hooks.py new file mode 100644 index 0000000..3560b89 --- /dev/null +++ b/requests/hooks.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- + +""" +requests.hooks +~~~~~~~~~~~~~~ + +This module provides the capabilities for the Requests hooks system. + +Available hooks: + +``args``: + A dictionary of the arguments being sent to Request(). + +``pre_request``: + The Request object, directly before being sent. + +``post_request``: + The Request object, directly after being sent. + +``response``: + The response generated from a Request. + +""" + +import traceback + + +HOOKS = ('args', 'pre_request', 'post_request', 'response') + + +def dispatch_hook(key, hooks, hook_data): + """Dispatches a hook dictionary on a given piece of data.""" + + hooks = hooks or dict() + + if key in hooks: + hooks = hooks.get(key) + + if hasattr(hooks, '__call__'): + hooks = [hooks] + + for hook in hooks: + try: + hook_data = hook(hook_data) or hook_data + except Exception: + traceback.print_exc() + + return hook_data diff --git a/requests/models.py b/requests/models.py new file mode 100644 index 0000000..84a2ec6 --- /dev/null +++ b/requests/models.py @@ -0,0 +1,784 @@ +# -*- coding: utf-8 -*- + +""" +requests.models +~~~~~~~~~~~~~~~ + +This module contains the primary objects that power Requests. +""" + +import os +import urllib + +from urlparse import urlparse, urlunparse, urljoin, urlsplit +from datetime import datetime + +from .hooks import dispatch_hook, HOOKS +from .structures import CaseInsensitiveDict +from .status_codes import codes +from .packages import oreos +from .auth import HTTPBasicAuth, HTTPProxyAuth +from .packages.urllib3.response import HTTPResponse +from .packages.urllib3.exceptions import MaxRetryError +from .packages.urllib3.exceptions import SSLError as _SSLError +from .packages.urllib3.exceptions import HTTPError as _HTTPError +from .packages.urllib3 import connectionpool, poolmanager +from .packages.urllib3.filepost import encode_multipart_formdata +from .exceptions import ( + ConnectionError, HTTPError, RequestException, Timeout, TooManyRedirects, + URLRequired, SSLError) +from .utils import ( + get_encoding_from_headers, stream_decode_response_unicode, + stream_decompress, guess_filename, requote_path) + +# Import chardet if it is available. +try: + import chardet +except ImportError: + pass + +REDIRECT_STATI = (codes.moved, codes.found, codes.other, codes.temporary_moved) + + + +class Request(object): + """The :class:`Request <Request>` object. It carries out all functionality of + Requests. Recommended interface is with the Requests functions. + """ + + def __init__(self, + url=None, + headers=dict(), + files=None, + method=None, + data=dict(), + params=dict(), + auth=None, + cookies=None, + timeout=None, + redirect=False, + allow_redirects=False, + proxies=None, + hooks=None, + config=None, + _poolmanager=None, + verify=None, + session=None): + + #: Float describes the timeout of the request. + # (Use socket.setdefaulttimeout() as fallback) + self.timeout = timeout + + #: Request URL. + self.url = url + + #: Dictionary of HTTP Headers to attach to the :class:`Request <Request>`. + self.headers = dict(headers or []) + + #: Dictionary of files to multipart upload (``{filename: content}``). + self.files = files + + #: HTTP Method to use. + self.method = method + + #: Dictionary or byte of request body data to attach to the + #: :class:`Request <Request>`. + self.data = None + + #: Dictionary or byte of querystring data to attach to the + #: :class:`Request <Request>`. + self.params = None + + #: True if :class:`Request <Request>` is part of a redirect chain (disables history + #: and HTTPError storage). + self.redirect = redirect + + #: Set to True if full redirects are allowed (e.g. re-POST-ing of data at new ``Location``) + self.allow_redirects = allow_redirects + + # Dictionary mapping protocol to the URL of the proxy (e.g. {'http': 'foo.bar:3128'}) + self.proxies = dict(proxies or []) + + self.data, self._enc_data = self._encode_params(data) + self.params, self._enc_params = self._encode_params(params) + + #: :class:`Response <Response>` instance, containing + #: content and metadata of HTTP Response, once :attr:`sent <send>`. + self.response = Response() + + #: Authentication tuple or object to attach to :class:`Request <Request>`. + self.auth = auth + + #: CookieJar to attach to :class:`Request <Request>`. + self.cookies = dict(cookies or []) + + #: Dictionary of configurations for this request. + self.config = dict(config or []) + + #: True if Request has been sent. + self.sent = False + + #: Event-handling hooks. + self.hooks = {} + + for event in HOOKS: + self.hooks[event] = [] + + hooks = hooks or {} + + for (k, v) in hooks.items(): + self.register_hook(event=k, hook=v) + + #: Session. + self.session = session + + #: SSL Verification. + self.verify = verify + + if headers: + headers = CaseInsensitiveDict(self.headers) + else: + headers = CaseInsensitiveDict() + + # Add configured base headers. + for (k, v) in self.config.get('base_headers', {}).items(): + if k not in headers: + headers[k] = v + + self.headers = headers + self._poolmanager = _poolmanager + + # Pre-request hook. + r = dispatch_hook('pre_request', hooks, self) + self.__dict__.update(r.__dict__) + + + def __repr__(self): + return '<Request [%s]>' % (self.method) + + + def _build_response(self, resp): + """Build internal :class:`Response <Response>` object + from given response. + """ + + def build(resp): + + response = Response() + + # Pass settings over. + response.config = self.config + + if resp: + + # Fallback to None if there's no status_code, for whatever reason. + response.status_code = getattr(resp, 'status', None) + + # Make headers case-insensitive. + response.headers = CaseInsensitiveDict(getattr(resp, 'headers', None)) + + # Set encoding. + response.encoding = get_encoding_from_headers(response.headers) + + # Start off with our local cookies. + cookies = self.cookies or dict() + + # Add new cookies from the server. + if 'set-cookie' in response.headers: + cookie_header = response.headers['set-cookie'] + cookies = oreos.dict_from_string(cookie_header) + + # Save cookies in Response. + response.cookies = cookies + + # No exceptions were harmed in the making of this request. + response.error = getattr(resp, 'error', None) + + # Save original response for later. + response.raw = resp + response.url = self.full_url.decode('utf-8') + + return response + + history = [] + + r = build(resp) + cookies = self.cookies + self.cookies.update(r.cookies) + + if r.status_code in REDIRECT_STATI and not self.redirect: + + while ( + ('location' in r.headers) and + ((r.status_code is codes.see_other) or (self.allow_redirects)) + ): + + if not len(history) < self.config.get('max_redirects'): + raise TooManyRedirects() + + history.append(r) + + url = r.headers['location'] + + # Handle redirection without scheme (see: RFC 1808 Section 4) + if url.startswith('//'): + parsed_rurl = urlparse(r.url) + url = '%s:%s' % (parsed_rurl.scheme, url) + + # Facilitate non-RFC2616-compliant 'location' headers + # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource') + if not urlparse(url).netloc: + url = urljoin(r.url, url) + + # http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.4 + if r.status_code is codes.see_other: + method = 'GET' + else: + method = self.method + + # Remove the cookie headers that were sent. + headers = self.headers + try: + del headers['Cookie'] + except KeyError: + pass + + request = Request( + url=url, + headers=headers, + files=self.files, + method=method, + params=self.session.params, + auth=self.auth, + cookies=cookies, + redirect=True, + config=self.config, + timeout=self.timeout, + _poolmanager=self._poolmanager, + proxies = self.proxies, + verify = self.verify, + session = self.session + ) + + request.send() + cookies.update(request.response.cookies) + r = request.response + self.cookies.update(r.cookies) + + r.history = history + + self.response = r + self.response.request = self + self.response.cookies.update(self.cookies) + + + @staticmethod + def _encode_params(data): + """Encode parameters in a piece of data. + + If the data supplied is a dictionary, encodes each parameter in it, and + returns a list of tuples containing the encoded parameters, and a urlencoded + version of that. + + Otherwise, assumes the data is already encoded appropriately, and + returns it twice. + """ + + if hasattr(data, '__iter__'): + data = dict(data) + + if hasattr(data, 'items'): + result = [] + for k, vs in data.items(): + for v in isinstance(vs, list) and vs or [vs]: + result.append((k.encode('utf-8') if isinstance(k, unicode) else k, + v.encode('utf-8') if isinstance(v, unicode) else v)) + return result, urllib.urlencode(result, doseq=True) + else: + return data, data + + @property + def full_url(self): + """Build the actual URL to use.""" + + if not self.url: + raise URLRequired() + + # Support for unicode domain names and paths. + scheme, netloc, path, params, query, fragment = urlparse(self.url) + + if not scheme: + raise ValueError("Invalid URL %r: No schema supplied" %self.url) + + netloc = netloc.encode('idna') + + if isinstance(path, unicode): + path = path.encode('utf-8') + + path = requote_path(path) + + url = str(urlunparse([ scheme, netloc, path, params, query, fragment ])) + + if self._enc_params: + if urlparse(url).query: + return '%s&%s' % (url, self._enc_params) + else: + return '%s?%s' % (url, self._enc_params) + else: + return url + + @property + def path_url(self): + """Build the path URL to use.""" + + url = [] + + p = urlsplit(self.full_url) + + # Proxies use full URLs. + if p.scheme in self.proxies: + return self.full_url + + path = p.path + if not path: + path = '/' + url.append(path) + + query = p.query + if query: + url.append('?') + url.append(query) + + return ''.join(url) + + + def register_hook(self, event, hook): + """Properly register a hook.""" + + return self.hooks[event].append(hook) + + + def send(self, anyway=False, prefetch=False): + """Sends the request. Returns True of successful, false if not. + If there was an HTTPError during transmission, + self.response.status_code will contain the HTTPError code. + + Once a request is successfully sent, `sent` will equal True. + + :param anyway: If True, request will be sent, even if it has + already been sent. + """ + + # Build the URL + url = self.full_url + + # Logging + if self.config.get('verbose'): + self.config.get('verbose').write('%s %s %s\n' % ( + datetime.now().isoformat(), self.method, url + )) + + # Nottin' on you. + body = None + content_type = None + + # Multi-part file uploads. + if self.files: + if not isinstance(self.data, basestring): + + try: + fields = self.data.copy() + except AttributeError: + fields = dict(self.data) + + for (k, v) in self.files.items(): + # support for explicit filename + if isinstance(v, (tuple, list)): + fn, fp = v + else: + fn = guess_filename(v) or k + fp = v + fields.update({k: (fn, fp.read())}) + + (body, content_type) = encode_multipart_formdata(fields) + else: + pass + # TODO: Conflict? + else: + if self.data: + + body = self._enc_data + if isinstance(self.data, basestring): + content_type = None + else: + content_type = 'application/x-www-form-urlencoded' + + # Add content-type if it wasn't explicitly provided. + if (content_type) and (not 'content-type' in self.headers): + self.headers['Content-Type'] = content_type + + if self.auth: + if isinstance(self.auth, tuple) and len(self.auth) == 2: + # special-case basic HTTP auth + self.auth = HTTPBasicAuth(*self.auth) + + # Allow auth to make its changes. + r = self.auth(self) + + # Update self to reflect the auth changes. + self.__dict__.update(r.__dict__) + + _p = urlparse(url) + proxy = self.proxies.get(_p.scheme) + + if proxy: + conn = poolmanager.proxy_from_url(proxy) + _proxy = urlparse(proxy) + if '@' in _proxy.netloc: + auth, url = _proxy.netloc.split('@', 1) + self.proxy_auth = HTTPProxyAuth(*auth.split(':', 1)) + r = self.proxy_auth(self) + self.__dict__.update(r.__dict__) + else: + # Check to see if keep_alive is allowed. + if self.config.get('keep_alive'): + conn = self._poolmanager.connection_from_url(url) + else: + conn = connectionpool.connection_from_url(url) + + if url.startswith('https') and self.verify: + + cert_loc = None + + # Allow self-specified cert location. + if self.verify is not True: + cert_loc = self.verify + + + # Look for configuration. + if not cert_loc: + cert_loc = os.environ.get('REQUESTS_CA_BUNDLE') + + # Curl compatiblity. + if not cert_loc: + cert_loc = os.environ.get('CURL_CA_BUNDLE') + + # Use the awesome certifi list. + if not cert_loc: + cert_loc = __import__('certifi').where() + + conn.cert_reqs = 'CERT_REQUIRED' + conn.ca_certs = cert_loc + else: + conn.cert_reqs = 'CERT_NONE' + conn.ca_certs = None + + if not self.sent or anyway: + + if self.cookies: + + # Skip if 'cookie' header is explicitly set. + if 'cookie' not in self.headers: + + # Simple cookie with our dict. + c = oreos.monkeys.SimpleCookie() + for (k, v) in self.cookies.items(): + c[k] = v + + # Turn it into a header. + cookie_header = c.output(header='', sep='; ').strip() + + # Attach Cookie header to request. + self.headers['Cookie'] = cookie_header + + try: + # The inner try .. except re-raises certain exceptions as + # internal exception types; the outer suppresses exceptions + # when safe mode is set. + try: + # Send the request. + r = conn.urlopen( + method=self.method, + url=self.path_url, + body=body, + headers=self.headers, + redirect=False, + assert_same_host=False, + preload_content=False, + decode_content=True, + retries=self.config.get('max_retries', 0), + timeout=self.timeout, + ) + self.sent = True + + except MaxRetryError, e: + raise ConnectionError(e) + + except (_SSLError, _HTTPError), e: + if self.verify and isinstance(e, _SSLError): + raise SSLError(e) + + raise Timeout('Request timed out.') + + except RequestException, e: + if self.config.get('safe_mode', False): + # In safe mode, catch the exception and attach it to + # a blank urllib3.HTTPResponse object. + r = HTTPResponse() + r.error = e + else: + raise + + self._build_response(r) + + # Response manipulation hook. + self.response = dispatch_hook('response', self.hooks, self.response) + + # Post-request hook. + r = dispatch_hook('post_request', self.hooks, self) + self.__dict__.update(r.__dict__) + + # If prefetch is True, mark content as consumed. + if prefetch: + # Save the response. + self.response.content + + if self.config.get('danger_mode'): + self.response.raise_for_status() + + return self.sent + + +class Response(object): + """The core :class:`Response <Response>` object. All + :class:`Request <Request>` objects contain a + :class:`response <Response>` attribute, which is an instance + of this class. + """ + + def __init__(self): + + self._content = None + self._content_consumed = False + + #: Integer Code of responded HTTP Status. + self.status_code = None + + #: Case-insensitive Dictionary of Response Headers. + #: For example, ``headers['content-encoding']`` will return the + #: value of a ``'Content-Encoding'`` response header. + self.headers = CaseInsensitiveDict() + + #: File-like object representation of response (for advanced usage). + self.raw = None + + #: Final URL location of Response. + self.url = None + + #: Resulting :class:`HTTPError` of request, if one occurred. + self.error = None + + #: Encoding to decode with when accessing r.content. + self.encoding = None + + #: A list of :class:`Response <Response>` objects from + #: the history of the Request. Any redirect responses will end + #: up here. + self.history = [] + + #: The :class:`Request <Request>` that created the Response. + self.request = None + + #: A dictionary of Cookies the server sent back. + self.cookies = {} + + #: Dictionary of configurations for this request. + self.config = {} + + + def __repr__(self): + return '<Response [%s]>' % (self.status_code) + + def __nonzero__(self): + """Returns true if :attr:`status_code` is 'OK'.""" + return self.ok + + @property + def ok(self): + try: + self.raise_for_status() + except HTTPError: + return False + return True + + + def iter_content(self, chunk_size=10 * 1024, decode_unicode=False): + """Iterates over the response data. This avoids reading the content + at once into memory for large responses. The chunk size is the number + of bytes it should read into memory. This is not necessarily the + length of each item returned as decoding can take place. + """ + if self._content_consumed: + raise RuntimeError( + 'The content for this response was already consumed' + ) + + def generate(): + while 1: + chunk = self.raw.read(chunk_size) + if not chunk: + break + yield chunk + self._content_consumed = True + + def generate_chunked(): + resp = self.raw._original_response + fp = resp.fp + if resp.chunk_left is not None: + pending_bytes = resp.chunk_left + while pending_bytes: + chunk = fp.read(min(chunk_size, pending_bytes)) + pending_bytes-=len(chunk) + yield chunk + fp.read(2) # throw away crlf + while 1: + #XXX correct line size? (httplib has 64kb, seems insane) + pending_bytes = fp.readline(40).strip() + pending_bytes = int(pending_bytes, 16) + if pending_bytes == 0: + break + while pending_bytes: + chunk = fp.read(min(chunk_size, pending_bytes)) + pending_bytes-=len(chunk) + yield chunk + fp.read(2) # throw away crlf + self._content_consumed = True + fp.close() + + + if getattr(getattr(self.raw, '_original_response', None), 'chunked', False): + gen = generate_chunked() + else: + gen = generate() + + if 'gzip' in self.headers.get('content-encoding', ''): + gen = stream_decompress(gen, mode='gzip') + elif 'deflate' in self.headers.get('content-encoding', ''): + gen = stream_decompress(gen, mode='deflate') + + if decode_unicode: + gen = stream_decode_response_unicode(gen, self) + + return gen + + + def iter_lines(self, chunk_size=10 * 1024, decode_unicode=None): + """Iterates over the response data, one line at a time. This + avoids reading the content at once into memory for large + responses. + """ + + #TODO: why rstrip by default + pending = None + + for chunk in self.iter_content(chunk_size, decode_unicode=decode_unicode): + + if pending is not None: + chunk = pending + chunk + lines = chunk.splitlines(True) + + for line in lines[:-1]: + yield line.rstrip() + + # Save the last part of the chunk for next iteration, to keep full line together + # lines may be empty for the last chunk of a chunked response + + if lines: + pending = lines[-1] + #if pending is a complete line, give it baack + if pending[-1] == '\n': + yield pending.rstrip() + pending = None + else: + pending = None + + # Yield the last line + if pending is not None: + yield pending.rstrip() + + + @property + def content(self): + """Content of the response, in bytes.""" + + if self._content is None: + # Read the contents. + try: + if self._content_consumed: + raise RuntimeError( + 'The content for this response was already consumed') + + self._content = self.raw.read() + except AttributeError: + self._content = None + + self._content_consumed = True + return self._content + + + @property + def text(self): + """Content of the response, in unicode. + + if Response.encoding is None and chardet module is available, encoding + will be guessed. + """ + + # Try charset from content-type + content = None + encoding = self.encoding + + # Fallback to auto-detected encoding if chardet is available. + if self.encoding is None: + try: + detected = chardet.detect(self.content) or {} + encoding = detected.get('encoding') + + # Trust that chardet isn't available or something went terribly wrong. + except Exception: + pass + + # Decode unicode from given encoding. + try: + content = unicode(self.content, encoding) + except UnicodeError, TypeError: + pass + + # Try to fall back: + if not content: + try: + content = unicode(content, encoding, errors='replace') + except UnicodeError, TypeError: + pass + + + + return content + + + def raise_for_status(self): + """Raises stored :class:`HTTPError` or :class:`URLError`, if one occurred.""" + + if self.error: + raise self.error + + if (self.status_code >= 300) and (self.status_code < 400): + raise HTTPError('%s Redirection' % self.status_code) + + elif (self.status_code >= 400) and (self.status_code < 500): + raise HTTPError('%s Client Error' % self.status_code) + + elif (self.status_code >= 500) and (self.status_code < 600): + raise HTTPError('%s Server Error' % self.status_code) + + diff --git a/requests/packages/__init__.py b/requests/packages/__init__.py new file mode 100644 index 0000000..d62c4b7 --- /dev/null +++ b/requests/packages/__init__.py @@ -0,0 +1,3 @@ +from __future__ import absolute_import + +from . import urllib3 diff --git a/requests/packages/oreos/__init__.py b/requests/packages/oreos/__init__.py new file mode 100644 index 0000000..d01340f --- /dev/null +++ b/requests/packages/oreos/__init__.py @@ -0,0 +1,3 @@ +# -*- coding: utf-8 -*- + +from .core import dict_from_string
\ No newline at end of file diff --git a/requests/packages/oreos/core.py b/requests/packages/oreos/core.py new file mode 100644 index 0000000..359d744 --- /dev/null +++ b/requests/packages/oreos/core.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- + +""" +oreos.core +~~~~~~~~~~ + +The creamy white center. +""" + +from .monkeys import SimpleCookie + + +def dict_from_string(s): + """Returns a MultiDict with Cookies.""" + + cookies = dict() + + c = SimpleCookie() + c.load(s) + + for k,v in c.items(): + cookies.update({k: v.value}) + + return cookies
\ No newline at end of file diff --git a/requests/packages/oreos/monkeys.py b/requests/packages/oreos/monkeys.py new file mode 100644 index 0000000..6be3074 --- /dev/null +++ b/requests/packages/oreos/monkeys.py @@ -0,0 +1,770 @@ +# -*- coding: utf-8 -*- + +""" +oreos.monkeys +~~~~~~~~~~~~~ + +Monkeypatches. +""" +#!/usr/bin/env python +# + +#### +# Copyright 2000 by Timothy O'Malley <timo@alum.mit.edu> +# +# All Rights Reserved +# +# Permission to use, copy, modify, and distribute this software +# and its documentation for any purpose and without fee is hereby +# granted, provided that the above copyright notice appear in all +# copies and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# Timothy O'Malley not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS +# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR +# ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +# PERFORMANCE OF THIS SOFTWARE. +# +#### +# +# Id: Cookie.py,v 2.29 2000/08/23 05:28:49 timo Exp +# by Timothy O'Malley <timo@alum.mit.edu> +# +# Cookie.py is a Python module for the handling of HTTP +# cookies as a Python dictionary. See RFC 2109 for more +# information on cookies. +# +# The original idea to treat Cookies as a dictionary came from +# Dave Mitchell (davem@magnet.com) in 1995, when he released the +# first version of nscookie.py. +# +#### + +r""" +Here's a sample session to show how to use this module. +At the moment, this is the only documentation. + +The Basics +---------- + +Importing is easy.. + + >>> import Cookie + +Most of the time you start by creating a cookie. Cookies come in +three flavors, each with slightly different encoding semantics, but +more on that later. + + >>> C = Cookie.SimpleCookie() + >>> C = Cookie.SerialCookie() + >>> C = Cookie.SmartCookie() + +[Note: Long-time users of Cookie.py will remember using +Cookie.Cookie() to create an Cookie object. Although deprecated, it +is still supported by the code. See the Backward Compatibility notes +for more information.] + +Once you've created your Cookie, you can add values just as if it were +a dictionary. + + >>> C = Cookie.SmartCookie() + >>> C["fig"] = "newton" + >>> C["sugar"] = "wafer" + >>> C.output() + 'Set-Cookie: fig=newton\r\nSet-Cookie: sugar=wafer' + +Notice that the printable representation of a Cookie is the +appropriate format for a Set-Cookie: header. This is the +default behavior. You can change the header and printed +attributes by using the .output() function + + >>> C = Cookie.SmartCookie() + >>> C["rocky"] = "road" + >>> C["rocky"]["path"] = "/cookie" + >>> print C.output(header="Cookie:") + Cookie: rocky=road; Path=/cookie + >>> print C.output(attrs=[], header="Cookie:") + Cookie: rocky=road + +The load() method of a Cookie extracts cookies from a string. In a +CGI script, you would use this method to extract the cookies from the +HTTP_COOKIE environment variable. + + >>> C = Cookie.SmartCookie() + >>> C.load("chips=ahoy; vienna=finger") + >>> C.output() + 'Set-Cookie: chips=ahoy\r\nSet-Cookie: vienna=finger' + +The load() method is darn-tootin smart about identifying cookies +within a string. Escaped quotation marks, nested semicolons, and other +such trickeries do not confuse it. + + >>> C = Cookie.SmartCookie() + >>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=\\012;";') + >>> print C + Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=\012;" + +Each element of the Cookie also supports all of the RFC 2109 +Cookie attributes. Here's an example which sets the Path +attribute. + + >>> C = Cookie.SmartCookie() + >>> C["oreo"] = "doublestuff" + >>> C["oreo"]["path"] = "/" + >>> print C + Set-Cookie: oreo=doublestuff; Path=/ + +Each dictionary element has a 'value' attribute, which gives you +back the value associated with the key. + + >>> C = Cookie.SmartCookie() + >>> C["twix"] = "none for you" + >>> C["twix"].value + 'none for you' + + +A Bit More Advanced +------------------- + +As mentioned before, there are three different flavors of Cookie +objects, each with different encoding/decoding semantics. This +section briefly discusses the differences. + +SimpleCookie + +The SimpleCookie expects that all values should be standard strings. +Just to be sure, SimpleCookie invokes the str() builtin to convert +the value to a string, when the values are set dictionary-style. + + >>> C = Cookie.SimpleCookie() + >>> C["number"] = 7 + >>> C["string"] = "seven" + >>> C["number"].value + '7' + >>> C["string"].value + 'seven' + >>> C.output() + 'Set-Cookie: number=7\r\nSet-Cookie: string=seven' + + +SerialCookie + +The SerialCookie expects that all values should be serialized using +cPickle (or pickle, if cPickle isn't available). As a result of +serializing, SerialCookie can save almost any Python object to a +value, and recover the exact same object when the cookie has been +returned. (SerialCookie can yield some strange-looking cookie +values, however.) + + >>> C = Cookie.SerialCookie() + >>> C["number"] = 7 + >>> C["string"] = "seven" + >>> C["number"].value + 7 + >>> C["string"].value + 'seven' + >>> C.output() + 'Set-Cookie: number="I7\\012."\r\nSet-Cookie: string="S\'seven\'\\012p1\\012."' + +Be warned, however, if SerialCookie cannot de-serialize a value (because +it isn't a valid pickle'd object), IT WILL RAISE AN EXCEPTION. + + +SmartCookie + +The SmartCookie combines aspects of each of the other two flavors. +When setting a value in a dictionary-fashion, the SmartCookie will +serialize (ala cPickle) the value *if and only if* it isn't a +Python string. String objects are *not* serialized. Similarly, +when the load() method parses out values, it attempts to de-serialize +the value. If it fails, then it fallsback to treating the value +as a string. + + >>> C = Cookie.SmartCookie() + >>> C["number"] = 7 + >>> C["string"] = "seven" + >>> C["number"].value + 7 + >>> C["string"].value + 'seven' + >>> C.output() + 'Set-Cookie: number="I7\\012."\r\nSet-Cookie: string=seven' + + +Backwards Compatibility +----------------------- + +In order to keep compatibilty with earlier versions of Cookie.py, +it is still possible to use Cookie.Cookie() to create a Cookie. In +fact, this simply returns a SmartCookie. + + >>> C = Cookie.Cookie() + >>> print C.__class__.__name__ + SmartCookie + + +Finis. +""" #" +# ^ +# |----helps out font-lock + +# +# Import our required modules +# +import string + +try: + from cPickle import dumps, loads +except ImportError: + from pickle import dumps, loads + +import re, warnings + +__all__ = ["CookieError","BaseCookie","SimpleCookie","SerialCookie", + "SmartCookie","Cookie"] + +_nulljoin = ''.join +_semispacejoin = '; '.join +_spacejoin = ' '.join + +# +# Define an exception visible to External modules +# +class CookieError(Exception): + pass + + +# These quoting routines conform to the RFC2109 specification, which in +# turn references the character definitions from RFC2068. They provide +# a two-way quoting algorithm. Any non-text character is translated +# into a 4 character sequence: a forward-slash followed by the +# three-digit octal equivalent of the character. Any '\' or '"' is +# quoted with a preceeding '\' slash. +# +# These are taken from RFC2068 and RFC2109. +# _LegalChars is the list of chars which don't require "'s +# _Translator hash-table for fast quoting +# +_LegalChars = string.ascii_letters + string.digits + "!#$%&'*+-.^_`|~[]_" +_Translator = { + '\000' : '\\000', '\001' : '\\001', '\002' : '\\002', + '\003' : '\\003', '\004' : '\\004', '\005' : '\\005', + '\006' : '\\006', '\007' : '\\007', '\010' : '\\010', + '\011' : '\\011', '\012' : '\\012', '\013' : '\\013', + '\014' : '\\014', '\015' : '\\015', '\016' : '\\016', + '\017' : '\\017', '\020' : '\\020', '\021' : '\\021', + '\022' : '\\022', '\023' : '\\023', '\024' : '\\024', + '\025' : '\\025', '\026' : '\\026', '\027' : '\\027', + '\030' : '\\030', '\031' : '\\031', '\032' : '\\032', + '\033' : '\\033', '\034' : '\\034', '\035' : '\\035', + '\036' : '\\036', '\037' : '\\037', + + # Because of the way browsers really handle cookies (as opposed + # to what the RFC says) we also encode , and ; + + ',' : '\\054', ';' : '\\073', + + '"' : '\\"', '\\' : '\\\\', + + '\177' : '\\177', '\200' : '\\200', '\201' : '\\201', + '\202' : '\\202', '\203' : '\\203', '\204' : '\\204', + '\205' : '\\205', '\206' : '\\206', '\207' : '\\207', + '\210' : '\\210', '\211' : '\\211', '\212' : '\\212', + '\213' : '\\213', '\214' : '\\214', '\215' : '\\215', + '\216' : '\\216', '\217' : '\\217', '\220' : '\\220', + '\221' : '\\221', '\222' : '\\222', '\223' : '\\223', + '\224' : '\\224', '\225' : '\\225', '\226' : '\\226', + '\227' : '\\227', '\230' : '\\230', '\231' : '\\231', + '\232' : '\\232', '\233' : '\\233', '\234' : '\\234', + '\235' : '\\235', '\236' : '\\236', '\237' : '\\237', + '\240' : '\\240', '\241' : '\\241', '\242' : '\\242', + '\243' : '\\243', '\244' : '\\244', '\245' : '\\245', + '\246' : '\\246', '\247' : '\\247', '\250' : '\\250', + '\251' : '\\251', '\252' : '\\252', '\253' : '\\253', + '\254' : '\\254', '\255' : '\\255', '\256' : '\\256', + '\257' : '\\257', '\260' : '\\260', '\261' : '\\261', + '\262' : '\\262', '\263' : '\\263', '\264' : '\\264', + '\265' : '\\265', '\266' : '\\266', '\267' : '\\267', + '\270' : '\\270', '\271' : '\\271', '\272' : '\\272', + '\273' : '\\273', '\274' : '\\274', '\275' : '\\275', + '\276' : '\\276', '\277' : '\\277', '\300' : '\\300', + '\301' : '\\301', '\302' : '\\302', '\303' : '\\303', + '\304' : '\\304', '\305' : '\\305', '\306' : '\\306', + '\307' : '\\307', '\310' : '\\310', '\311' : '\\311', + '\312' : '\\312', '\313' : '\\313', '\314' : '\\314', + '\315' : '\\315', '\316' : '\\316', '\317' : '\\317', + '\320' : '\\320', '\321' : '\\321', '\322' : '\\322', + '\323' : '\\323', '\324' : '\\324', '\325' : '\\325', + '\326' : '\\326', '\327' : '\\327', '\330' : '\\330', + '\331' : '\\331', '\332' : '\\332', '\333' : '\\333', + '\334' : '\\334', '\335' : '\\335', '\336' : '\\336', + '\337' : '\\337', '\340' : '\\340', '\341' : '\\341', + '\342' : '\\342', '\343' : '\\343', '\344' : '\\344', + '\345' : '\\345', '\346' : '\\346', '\347' : '\\347', + '\350' : '\\350', '\351' : '\\351', '\352' : '\\352', + '\353' : '\\353', '\354' : '\\354', '\355' : '\\355', + '\356' : '\\356', '\357' : '\\357', '\360' : '\\360', + '\361' : '\\361', '\362' : '\\362', '\363' : '\\363', + '\364' : '\\364', '\365' : '\\365', '\366' : '\\366', + '\367' : '\\367', '\370' : '\\370', '\371' : '\\371', + '\372' : '\\372', '\373' : '\\373', '\374' : '\\374', + '\375' : '\\375', '\376' : '\\376', '\377' : '\\377' + } + +_idmap = ''.join(chr(x) for x in xrange(256)) + +def _quote(str, LegalChars=_LegalChars, + idmap=_idmap, translate=string.translate): + # + # If the string does not need to be double-quoted, + # then just return the string. Otherwise, surround + # the string in doublequotes and precede quote (with a \) + # special characters. + # + if "" == translate(str, idmap, LegalChars): + return str + else: + return '"' + _nulljoin( map(_Translator.get, str, str) ) + '"' +# end _quote + + +_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]") +_QuotePatt = re.compile(r"[\\].") + +def _unquote(str): + # If there aren't any doublequotes, + # then there can't be any special characters. See RFC 2109. + if len(str) < 2: + return str + if str[0] != '"' or str[-1] != '"': + return str + + # We have to assume that we must decode this string. + # Down to work. + + # Remove the "s + str = str[1:-1] + + # Check for special sequences. Examples: + # \012 --> \n + # \" --> " + # + i = 0 + n = len(str) + res = [] + while 0 <= i < n: + Omatch = _OctalPatt.search(str, i) + Qmatch = _QuotePatt.search(str, i) + if not Omatch and not Qmatch: # Neither matched + res.append(str[i:]) + break + # else: + j = k = -1 + if Omatch: j = Omatch.start(0) + if Qmatch: k = Qmatch.start(0) + if Qmatch and ( not Omatch or k < j ): # QuotePatt matched + res.append(str[i:k]) + res.append(str[k+1]) + i = k+2 + else: # OctalPatt matched + res.append(str[i:j]) + res.append( chr( int(str[j+1:j+4], 8) ) ) + i = j+4 + return _nulljoin(res) +# end _unquote + +# The _getdate() routine is used to set the expiration time in +# the cookie's HTTP header. By default, _getdate() returns the +# current time in the appropriate "expires" format for a +# Set-Cookie header. The one optional argument is an offset from +# now, in seconds. For example, an offset of -3600 means "one hour ago". +# The offset may be a floating point number. +# + +_weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] + +_monthname = [None, + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] + +def _getdate(future=0, weekdayname=_weekdayname, monthname=_monthname): + from time import gmtime, time + now = time() + year, month, day, hh, mm, ss, wd, y, z = gmtime(now + future) + return "%s, %02d-%3s-%4d %02d:%02d:%02d GMT" % \ + (weekdayname[wd], day, monthname[month], year, hh, mm, ss) + + +# +# A class to hold ONE key,value pair. +# In a cookie, each such pair may have several attributes. +# so this class is used to keep the attributes associated +# with the appropriate key,value pair. +# This class also includes a coded_value attribute, which +# is used to hold the network representation of the +# value. This is most useful when Python objects are +# pickled for network transit. +# + +class Morsel(dict): + # RFC 2109 lists these attributes as reserved: + # path comment domain + # max-age secure version + # + # For historical reasons, these attributes are also reserved: + # expires + # + # This is an extension from Microsoft: + # httponly + # + # This dictionary provides a mapping from the lowercase + # variant on the left to the appropriate traditional + # formatting on the right. + _reserved = { "expires" : "expires", + "path" : "Path", + "comment" : "Comment", + "domain" : "Domain", + "max-age" : "Max-Age", + "secure" : "secure", + "httponly" : "httponly", + "version" : "Version", + } + + def __init__(self): + # Set defaults + self.key = self.value = self.coded_value = None + + # Set default attributes + for K in self._reserved: + dict.__setitem__(self, K, "") + # end __init__ + + def __setitem__(self, K, V): + K = K.lower() + if not K in self._reserved: + raise CookieError("Invalid Attribute %s" % K) + dict.__setitem__(self, K, V) + # end __setitem__ + + def isReservedKey(self, K): + return K.lower() in self._reserved + # end isReservedKey + + def set(self, key, val, coded_val, + LegalChars=_LegalChars, + idmap=_idmap, translate=string.translate): + # First we verify that the key isn't a reserved word + # Second we make sure it only contains legal characters + if key.lower() in self._reserved: + raise CookieError("Attempt to set a reserved key: %s" % key) + if "" != translate(key, idmap, LegalChars): + raise CookieError("Illegal key value: %s" % key) + + # It's a good key, so save it. + self.key = key + self.value = val + self.coded_value = coded_val + # end set + + def output(self, attrs=None, header = "Set-Cookie:"): + return "%s %s" % ( header, self.OutputString(attrs) ) + + __str__ = output + + def __repr__(self): + return '<%s: %s=%s>' % (self.__class__.__name__, + self.key, repr(self.value) ) + + def js_output(self, attrs=None): + # Print javascript + return """ + <script type="text/javascript"> + <!-- begin hiding + document.cookie = \"%s\"; + // end hiding --> + </script> + """ % ( self.OutputString(attrs).replace('"',r'\"'), ) + # end js_output() + + def OutputString(self, attrs=None): + # Build up our result + # + result = [] + RA = result.append + + # First, the key=value pair + RA("%s=%s" % (self.key, self.coded_value)) + + # Now add any defined attributes + if attrs is None: + attrs = self._reserved + items = self.items() + items.sort() + for K,V in items: + if V == "": continue + if K not in attrs: continue + if K == "expires" and type(V) == type(1): + RA("%s=%s" % (self._reserved[K], _getdate(V))) + elif K == "max-age" and type(V) == type(1): + RA("%s=%d" % (self._reserved[K], V)) + elif K == "secure": + RA(str(self._reserved[K])) + elif K == "httponly": + RA(str(self._reserved[K])) + else: + RA("%s=%s" % (self._reserved[K], V)) + + # Return the result + return _semispacejoin(result) + # end OutputString +# end Morsel class + + + +# +# Pattern for finding cookie +# +# This used to be strict parsing based on the RFC2109 and RFC2068 +# specifications. I have since discovered that MSIE 3.0x doesn't +# follow the character rules outlined in those specs. As a +# result, the parsing rules here are less strict. +# + +_LegalCharsPatt = r"[\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=\[\]\_]" + +_CookiePattern = re.compile( + r"(?x)" # This is a Verbose pattern + r"(?P<key>" # Start of group 'key' + ""+ _LegalCharsPatt +"+?" # Any word of at least one letter, nongreedy + r")" # End of group 'key' + r"\s*=\s*" # Equal Sign + r"(?P<val>" # Start of group 'val' + r'"(?:[^\\"]|\\.)*"' # Any doublequoted string + r"|" # or + r"\w{3},\s[\w\d-]{9,11}\s[\d:]{8}\sGMT" # Special case for "expires" attr + r"|" # or + ""+ _LegalCharsPatt +"*" # Any word or empty string + r")" # End of group 'val' + r"\s*;?" # Probably ending in a semi-colon + ) + + +# At long last, here is the cookie class. +# Using this class is almost just like using a dictionary. +# See this module's docstring for example usage. +# +class BaseCookie(dict): + # A container class for a set of Morsels + # + + def value_decode(self, val): + """real_value, coded_value = value_decode(STRING) + Called prior to setting a cookie's value from the network + representation. The VALUE is the value read from HTTP + header. + Override this function to modify the behavior of cookies. + """ + return val, val + # end value_encode + + def value_encode(self, val): + """real_value, coded_value = value_encode(VALUE) + Called prior to setting a cookie's value from the dictionary + representation. The VALUE is the value being assigned. + Override this function to modify the behavior of cookies. + """ + strval = str(val) + return strval, strval + # end value_encode + + def __init__(self, input=None): + if input: self.load(input) + # end __init__ + + def __set(self, key, real_value, coded_value): + """Private method for setting a cookie's value""" + M = self.get(key, Morsel()) + M.set(key, real_value, coded_value) + dict.__setitem__(self, key, M) + # end __set + + def __setitem__(self, key, value): + """Dictionary style assignment.""" + rval, cval = self.value_encode(value) + self.__set(key, rval, cval) + # end __setitem__ + + def output(self, attrs=None, header="Set-Cookie:", sep="\015\012"): + """Return a string suitable for HTTP.""" + result = [] + items = self.items() + items.sort() + for K,V in items: + result.append( V.output(attrs, header) ) + return sep.join(result) + # end output + + __str__ = output + + def __repr__(self): + L = [] + items = self.items() + items.sort() + for K,V in items: + L.append( '%s=%s' % (K,repr(V.value) ) ) + return '<%s: %s>' % (self.__class__.__name__, _spacejoin(L)) + + def js_output(self, attrs=None): + """Return a string suitable for JavaScript.""" + result = [] + items = self.items() + items.sort() + for K,V in items: + result.append( V.js_output(attrs) ) + return _nulljoin(result) + # end js_output + + def load(self, rawdata): + """Load cookies from a string (presumably HTTP_COOKIE) or + from a dictionary. Loading cookies from a dictionary 'd' + is equivalent to calling: + map(Cookie.__setitem__, d.keys(), d.values()) + """ + if type(rawdata) == type(""): + self.__ParseString(rawdata) + else: + # self.update() wouldn't call our custom __setitem__ + for k, v in rawdata.items(): + self[k] = v + return + # end load() + + def __ParseString(self, str, patt=_CookiePattern): + i = 0 # Our starting point + n = len(str) # Length of string + M = None # current morsel + + while 0 <= i < n: + # Start looking for a cookie + match = patt.search(str, i) + if not match: break # No more cookies + + K,V = match.group("key"), match.group("val") + i = match.end(0) + + # Parse the key, value in case it's metainfo + if K[0] == "$": + # We ignore attributes which pertain to the cookie + # mechanism as a whole. See RFC 2109. + # (Does anyone care?) + if M: + M[ K[1:] ] = V + elif K.lower() in Morsel._reserved: + if M: + M[ K ] = _unquote(V) + else: + rval, cval = self.value_decode(V) + self.__set(K, rval, cval) + M = self[K] + # end __ParseString +# end BaseCookie class + +class SimpleCookie(BaseCookie): + """SimpleCookie + SimpleCookie supports strings as cookie values. When setting + the value using the dictionary assignment notation, SimpleCookie + calls the builtin str() to convert the value to a string. Values + received from HTTP are kept as strings. + """ + def value_decode(self, val): + return _unquote( val ), val + def value_encode(self, val): + strval = str(val) + return strval, _quote( strval ) +# end SimpleCookie + +class SerialCookie(BaseCookie): + """SerialCookie + SerialCookie supports arbitrary objects as cookie values. All + values are serialized (using cPickle) before being sent to the + client. All incoming values are assumed to be valid Pickle + representations. IF AN INCOMING VALUE IS NOT IN A VALID PICKLE + FORMAT, THEN AN EXCEPTION WILL BE RAISED. + + Note: Large cookie values add overhead because they must be + retransmitted on every HTTP transaction. + + Note: HTTP has a 2k limit on the size of a cookie. This class + does not check for this limit, so be careful!!! + """ + def __init__(self, input=None): + warnings.warn("SerialCookie class is insecure; do not use it", + DeprecationWarning) + BaseCookie.__init__(self, input) + # end __init__ + def value_decode(self, val): + # This could raise an exception! + return loads( _unquote(val) ), val + def value_encode(self, val): + return val, _quote( dumps(val) ) +# end SerialCookie + +class SmartCookie(BaseCookie): + """SmartCookie + SmartCookie supports arbitrary objects as cookie values. If the + object is a string, then it is quoted. If the object is not a + string, however, then SmartCookie will use cPickle to serialize + the object into a string representation. + + Note: Large cookie values add overhead because they must be + retransmitted on every HTTP transaction. + + Note: HTTP has a 2k limit on the size of a cookie. This class + does not check for this limit, so be careful!!! + """ + def __init__(self, input=None): + warnings.warn("Cookie/SmartCookie class is insecure; do not use it", + DeprecationWarning) + BaseCookie.__init__(self, input) + # end __init__ + def value_decode(self, val): + strval = _unquote(val) + try: + return loads(strval), val + except: + return strval, val + def value_encode(self, val): + if type(val) == type(""): + return val, _quote(val) + else: + return val, _quote( dumps(val) ) +# end SmartCookie + + +########################################################### +# Backwards Compatibility: Don't break any existing code! + +# We provide Cookie() as an alias for SmartCookie() +Cookie = SmartCookie + +# +########################################################### + +def _test(): + import doctest, Cookie + return doctest.testmod(Cookie) + +if __name__ == "__main__": + _test() + + +#Local Variables: +#tab-width: 4 +#end: diff --git a/requests/packages/oreos/structures.py b/requests/packages/oreos/structures.py new file mode 100644 index 0000000..063d5f9 --- /dev/null +++ b/requests/packages/oreos/structures.py @@ -0,0 +1,399 @@ +# -*- coding: utf-8 -*- + +""" +oreos.sructures +~~~~~~~~~~~~~~~ + +The plastic blue packaging. + +This is mostly directly stolen from mitsuhiko/werkzeug. +""" + +__all__ = ('MultiDict',) + +class _Missing(object): + + def __repr__(self): + return 'no value' + + def __reduce__(self): + return '_missing' + +_missing = _Missing() + + + +def iter_multi_items(mapping): + """Iterates over the items of a mapping yielding keys and values + without dropping any from more complex structures. + """ + if isinstance(mapping, MultiDict): + for item in mapping.iteritems(multi=True): + yield item + elif isinstance(mapping, dict): + for key, value in mapping.iteritems(): + if isinstance(value, (tuple, list)): + for value in value: + yield key, value + else: + yield key, value + else: + for item in mapping: + yield item + + + +class TypeConversionDict(dict): + """Works like a regular dict but the :meth:`get` method can perform + type conversions. :class:`MultiDict` and :class:`CombinedMultiDict` + are subclasses of this class and provide the same feature. + + .. versionadded:: 0.5 + """ + + def get(self, key, default=None, type=None): + """Return the default value if the requested data doesn't exist. + If `type` is provided and is a callable it should convert the value, + return it or raise a :exc:`ValueError` if that is not possible. In + this case the function will return the default as if the value was not + found: + + >>> d = TypeConversionDict(foo='42', bar='blub') + >>> d.get('foo', type=int) + 42 + >>> d.get('bar', -1, type=int) + -1 + + :param key: The key to be looked up. + :param default: The default value to be returned if the key can't + be looked up. If not further specified `None` is + returned. + :param type: A callable that is used to cast the value in the + :class:`MultiDict`. If a :exc:`ValueError` is raised + by this callable the default value is returned. + """ + try: + rv = self[key] + if type is not None: + rv = type(rv) + except (KeyError, ValueError): + rv = default + return rv + + +class MultiDict(TypeConversionDict): + """A :class:`MultiDict` is a dictionary subclass customized to deal with + multiple values for the same key which is for example used by the parsing + functions in the wrappers. This is necessary because some HTML form + elements pass multiple values for the same key. + + :class:`MultiDict` implements all standard dictionary methods. + Internally, it saves all values for a key as a list, but the standard dict + access methods will only return the first value for a key. If you want to + gain access to the other values, too, you have to use the `list` methods as + explained below. + + Basic Usage: + + >>> d = MultiDict([('a', 'b'), ('a', 'c')]) + >>> d + MultiDict([('a', 'b'), ('a', 'c')]) + >>> d['a'] + 'b' + >>> d.getlist('a') + ['b', 'c'] + >>> 'a' in d + True + + It behaves like a normal dict thus all dict functions will only return the + first value when multiple values for one key are found. + + From Werkzeug 0.3 onwards, the `KeyError` raised by this class is also a + subclass of the :exc:`~exceptions.BadRequest` HTTP exception and will + render a page for a ``400 BAD REQUEST`` if caught in a catch-all for HTTP + exceptions. + + A :class:`MultiDict` can be constructed from an iterable of + ``(key, value)`` tuples, a dict, a :class:`MultiDict` or from Werkzeug 0.2 + onwards some keyword parameters. + + :param mapping: the initial value for the :class:`MultiDict`. Either a + regular dict, an iterable of ``(key, value)`` tuples + or `None`. + """ + + def __init__(self, mapping=None): + if isinstance(mapping, MultiDict): + dict.__init__(self, ((k, l[:]) for k, l in mapping.iterlists())) + elif isinstance(mapping, dict): + tmp = {} + for key, value in mapping.iteritems(): + if isinstance(value, (tuple, list)): + value = list(value) + else: + value = [value] + tmp[key] = value + dict.__init__(self, tmp) + else: + tmp = {} + for key, value in mapping or (): + tmp.setdefault(key, []).append(value) + dict.__init__(self, tmp) + + def __getstate__(self): + return dict(self.lists()) + + def __setstate__(self, value): + dict.clear(self) + dict.update(self, value) + + def __iter__(self): + return self.iterkeys() + + def __getitem__(self, key): + """Return the first data value for this key; + raises KeyError if not found. + + :param key: The key to be looked up. + :raise KeyError: if the key does not exist. + """ + if key in self: + return dict.__getitem__(self, key)[0] + raise KeyError(key) + + def __setitem__(self, key, value): + """Like :meth:`add` but removes an existing key first. + + :param key: the key for the value. + :param value: the value to set. + """ + dict.__setitem__(self, key, [value]) + + def add(self, key, value): + """Adds a new value for the key. + + .. versionadded:: 0.6 + + :param key: the key for the value. + :param value: the value to add. + """ + dict.setdefault(self, key, []).append(value) + + def getlist(self, key, type=None): + """Return the list of items for a given key. If that key is not in the + `MultiDict`, the return value will be an empty list. Just as `get` + `getlist` accepts a `type` parameter. All items will be converted + with the callable defined there. + + :param key: The key to be looked up. + :param type: A callable that is used to cast the value in the + :class:`MultiDict`. If a :exc:`ValueError` is raised + by this callable the value will be removed from the list. + :return: a :class:`list` of all the values for the key. + """ + try: + rv = dict.__getitem__(self, key) + except KeyError: + return [] + if type is None: + return list(rv) + result = [] + for item in rv: + try: + result.append(type(item)) + except ValueError: + pass + return result + + def setlist(self, key, new_list): + """Remove the old values for a key and add new ones. Note that the list + you pass the values in will be shallow-copied before it is inserted in + the dictionary. + + >>> d = MultiDict() + >>> d.setlist('foo', ['1', '2']) + >>> d['foo'] + '1' + >>> d.getlist('foo') + ['1', '2'] + + :param key: The key for which the values are set. + :param new_list: An iterable with the new values for the key. Old values + are removed first. + """ + dict.__setitem__(self, key, list(new_list)) + + def setdefault(self, key, default=None): + """Returns the value for the key if it is in the dict, otherwise it + returns `default` and sets that value for `key`. + + :param key: The key to be looked up. + :param default: The default value to be returned if the key is not + in the dict. If not further specified it's `None`. + """ + if key not in self: + self[key] = default + else: + default = self[key] + return default + + def setlistdefault(self, key, default_list=None): + """Like `setdefault` but sets multiple values. The list returned + is not a copy, but the list that is actually used internally. This + means that you can put new values into the dict by appending items + to the list: + + >>> d = MultiDict({"foo": 1}) + >>> d.setlistdefault("foo").extend([2, 3]) + >>> d.getlist("foo") + [1, 2, 3] + + :param key: The key to be looked up. + :param default: An iterable of default values. It is either copied + (in case it was a list) or converted into a list + before returned. + :return: a :class:`list` + """ + if key not in self: + default_list = list(default_list or ()) + dict.__setitem__(self, key, default_list) + else: + default_list = dict.__getitem__(self, key) + return default_list + + def items(self, multi=False): + """Return a list of ``(key, value)`` pairs. + + :param multi: If set to `True` the list returned will have a + pair for each value of each key. Otherwise it + will only contain pairs for the first value of + each key. + + :return: a :class:`list` + """ + return list(self.iteritems(multi)) + + def lists(self): + """Return a list of ``(key, values)`` pairs, where values is the list of + all values associated with the key. + + :return: a :class:`list` + """ + return list(self.iterlists()) + + def values(self): + """Returns a list of the first value on every key's value list. + + :return: a :class:`list`. + """ + return [self[key] for key in self.iterkeys()] + + def listvalues(self): + """Return a list of all values associated with a key. Zipping + :meth:`keys` and this is the same as calling :meth:`lists`: + + >>> d = MultiDict({"foo": [1, 2, 3]}) + >>> zip(d.keys(), d.listvalues()) == d.lists() + True + + :return: a :class:`list` + """ + return list(self.iterlistvalues()) + + def iteritems(self, multi=False): + """Like :meth:`items` but returns an iterator.""" + for key, values in dict.iteritems(self): + if multi: + for value in values: + yield key, value + else: + yield key, values[0] + + def iterlists(self): + """Like :meth:`items` but returns an iterator.""" + for key, values in dict.iteritems(self): + yield key, list(values) + + def itervalues(self): + """Like :meth:`values` but returns an iterator.""" + for values in dict.itervalues(self): + yield values[0] + + def iterlistvalues(self): + """Like :meth:`listvalues` but returns an iterator.""" + return dict.itervalues(self) + + def copy(self): + """Return a shallow copy of this object.""" + return self.__class__(self) + + def to_dict(self, flat=True): + """Return the contents as regular dict. If `flat` is `True` the + returned dict will only have the first item present, if `flat` is + `False` all values will be returned as lists. + + :param flat: If set to `False` the dict returned will have lists + with all the values in it. Otherwise it will only + contain the first value for each key. + :return: a :class:`dict` + """ + if flat: + return dict(self.iteritems()) + return dict(self.lists()) + + def update(self, other_dict): + """update() extends rather than replaces existing key lists.""" + for key, value in iter_multi_items(other_dict): + MultiDict.add(self, key, value) + + def pop(self, key, default=_missing): + """Pop the first item for a list on the dict. Afterwards the + key is removed from the dict, so additional values are discarded: + + >>> d = MultiDict({"foo": [1, 2, 3]}) + >>> d.pop("foo") + 1 + >>> "foo" in d + False + + :param key: the key to pop. + :param default: if provided the value to return if the key was + not in the dictionary. + """ + try: + return dict.pop(self, key)[0] + except KeyError, e: + if default is not _missing: + return default + raise KeyError(str(e)) + + def popitem(self): + """Pop an item from the dict.""" + try: + item = dict.popitem(self) + return (item[0], item[1][0]) + except KeyError, e: + raise KeyError(str(e)) + + def poplist(self, key): + """Pop the list for a key from the dict. If the key is not in the dict + an empty list is returned. + + .. versionchanged:: 0.5 + If the key does no longer exist a list is returned instead of + raising an error. + """ + return dict.pop(self, key, []) + + def popitemlist(self): + """Pop a ``(key, list)`` tuple from the dict.""" + try: + return dict.popitem(self) + except KeyError, e: + raise KeyError(str(e)) + + def __copy__(self): + return self.copy() + + def __repr__(self): + return '%s(%r)' % (self.__class__.__name__, self.items(multi=True)) diff --git a/requests/packages/urllib3/__init__.py b/requests/packages/urllib3/__init__.py new file mode 100644 index 0000000..20b1fb4 --- /dev/null +++ b/requests/packages/urllib3/__init__.py @@ -0,0 +1,48 @@ +# urllib3/__init__.py +# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +""" +urllib3 - Thread-safe connection pooling and re-using. +""" + +__author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' +__license__ = 'MIT' +__version__ = '1.0.2' + + +from .connectionpool import ( + HTTPConnectionPool, + HTTPSConnectionPool, + connection_from_url, + get_host, + make_headers) + + +from .exceptions import ( + HTTPError, + MaxRetryError, + SSLError, + TimeoutError) + +from .poolmanager import PoolManager, ProxyManager, proxy_from_url +from .response import HTTPResponse +from .filepost import encode_multipart_formdata + + +# Set default logging handler to avoid "No handler found" warnings. +import logging +try: + from logging import NullHandler +except ImportError: + class NullHandler(logging.Handler): + def emit(self, record): + pass + +logging.getLogger(__name__).addHandler(NullHandler()) + +# ... Clean up. +del logging +del NullHandler diff --git a/requests/packages/urllib3/_collections.py b/requests/packages/urllib3/_collections.py new file mode 100644 index 0000000..00b2cd5 --- /dev/null +++ b/requests/packages/urllib3/_collections.py @@ -0,0 +1,131 @@ +# urllib3/_collections.py +# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +from collections import deque + +from threading import RLock + +__all__ = ['RecentlyUsedContainer'] + + +class AccessEntry(object): + __slots__ = ('key', 'is_valid') + + def __init__(self, key, is_valid=True): + self.key = key + self.is_valid = is_valid + + +class RecentlyUsedContainer(dict): + """ + Provides a dict-like that maintains up to ``maxsize`` keys while throwing + away the least-recently-used keys beyond ``maxsize``. + """ + + # If len(self.access_log) exceeds self._maxsize * CLEANUP_FACTOR, then we + # will attempt to cleanup the invalidated entries in the access_log + # datastructure during the next 'get' operation. + CLEANUP_FACTOR = 10 + + def __init__(self, maxsize=10): + self._maxsize = maxsize + + self._container = {} + + # We use a deque to to store our keys ordered by the last access. + self.access_log = deque() + self.access_log_lock = RLock() + + # We look up the access log entry by the key to invalidate it so we can + # insert a new authorative entry at the head without having to dig and + # find the old entry for removal immediately. + self.access_lookup = {} + + # Trigger a heap cleanup when we get past this size + self.access_log_limit = maxsize * self.CLEANUP_FACTOR + + def _invalidate_entry(self, key): + "If exists: Invalidate old entry and return it." + old_entry = self.access_lookup.get(key) + if old_entry: + old_entry.is_valid = False + + return old_entry + + def _push_entry(self, key): + "Push entry onto our access log, invalidate the old entry if exists." + self._invalidate_entry(key) + + new_entry = AccessEntry(key) + self.access_lookup[key] = new_entry + + self.access_log_lock.acquire() + self.access_log.appendleft(new_entry) + self.access_log_lock.release() + + def _prune_entries(self, num): + "Pop entries from our access log until we popped ``num`` valid ones." + while num > 0: + self.access_log_lock.acquire() + p = self.access_log.pop() + self.access_log_lock.release() + + if not p.is_valid: + continue # Invalidated entry, skip + + dict.pop(self, p.key, None) + self.access_lookup.pop(p.key, None) + num -= 1 + + def _prune_invalidated_entries(self): + "Rebuild our access_log without the invalidated entries." + self.access_log_lock.acquire() + self.access_log = deque(e for e in self.access_log if e.is_valid) + self.access_log_lock.release() + + def _get_ordered_access_keys(self): + "Return ordered access keys for inspection. Used for testing." + self.access_log_lock.acquire() + r = [e.key for e in self.access_log if e.is_valid] + self.access_log_lock.release() + + return r + + def __getitem__(self, key): + item = dict.get(self, key) + + if not item: + raise KeyError(key) + + # Insert new entry with new high priority, also implicitly invalidates + # the old entry. + self._push_entry(key) + + if len(self.access_log) > self.access_log_limit: + # Heap is getting too big, try to clean up any tailing invalidated + # entries. + self._prune_invalidated_entries() + + return item + + def __setitem__(self, key, item): + # Add item to our container and access log + dict.__setitem__(self, key, item) + self._push_entry(key) + + # Discard invalid and excess entries + self._prune_entries(len(self) - self._maxsize) + + def __delitem__(self, key): + self._invalidate_entry(key) + self.access_lookup.pop(key, None) + dict.__delitem__(self, key) + + def get(self, key, default=None): + try: + return self[key] + except KeyError: + return default diff --git a/requests/packages/urllib3/connectionpool.py b/requests/packages/urllib3/connectionpool.py new file mode 100644 index 0000000..17f2f84 --- /dev/null +++ b/requests/packages/urllib3/connectionpool.py @@ -0,0 +1,548 @@ +# urllib3/connectionpool.py +# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +import logging +import socket + + +from httplib import HTTPConnection, HTTPSConnection, HTTPException +from Queue import Queue, Empty, Full +from select import select +from socket import error as SocketError, timeout as SocketTimeout + +from .packages.ssl_match_hostname import match_hostname, CertificateError + +try: + import ssl + BaseSSLError = ssl.SSLError +except ImportError: + ssl = None + BaseSSLError = None + + +from .request import RequestMethods +from .response import HTTPResponse +from .exceptions import ( + SSLError, + MaxRetryError, + TimeoutError, + HostChangedError, + EmptyPoolError, +) + + +log = logging.getLogger(__name__) + +_Default = object() + + +## Connection objects (extension of httplib) + +class VerifiedHTTPSConnection(HTTPSConnection): + """ + Based on httplib.HTTPSConnection but wraps the socket with + SSL certification. + """ + cert_reqs = None + ca_certs = None + + def set_cert(self, key_file=None, cert_file=None, + cert_reqs='CERT_NONE', ca_certs=None): + ssl_req_scheme = { + 'CERT_NONE': ssl.CERT_NONE, + 'CERT_OPTIONAL': ssl.CERT_OPTIONAL, + 'CERT_REQUIRED': ssl.CERT_REQUIRED + } + + self.key_file = key_file + self.cert_file = cert_file + self.cert_reqs = ssl_req_scheme.get(cert_reqs) or ssl.CERT_NONE + self.ca_certs = ca_certs + + def connect(self): + # Add certificate verification + sock = socket.create_connection((self.host, self.port), self.timeout) + + # Wrap socket using verification with the root certs in + # trusted_root_certs + self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, + cert_reqs=self.cert_reqs, + ca_certs=self.ca_certs) + if self.ca_certs: + match_hostname(self.sock.getpeercert(), self.host) + +## Pool objects + +class ConnectionPool(object): + """ + Base class for all connection pools, such as + :class:`.HTTPConnectionPool` and :class:`.HTTPSConnectionPool`. + """ + pass + + +class HTTPConnectionPool(ConnectionPool, RequestMethods): + """ + Thread-safe connection pool for one host. + + :param host: + Host used for this HTTP Connection (e.g. "localhost"), passed into + :class:`httplib.HTTPConnection`. + + :param port: + Port used for this HTTP Connection (None is equivalent to 80), passed + into :class:`httplib.HTTPConnection`. + + :param strict: + Causes BadStatusLine to be raised if the status line can't be parsed + as a valid HTTP/1.0 or 1.1 status line, passed into + :class:`httplib.HTTPConnection`. + + :param timeout: + Socket timeout for each individual connection, can be a float. None + disables timeout. + + :param maxsize: + Number of connections to save that can be reused. More than 1 is useful + in multithreaded situations. If ``block`` is set to false, more + connections will be created but they will not be saved once they've + been used. + + :param block: + If set to True, no more than ``maxsize`` connections will be used at + a time. When no free connections are available, the call will block + until a connection has been released. This is a useful side effect for + particular multithreaded situations where one does not want to use more + than maxsize connections per host to prevent flooding. + + :param headers: + Headers to include with all requests, unless other headers are given + explicitly. + """ + + scheme = 'http' + + def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, + block=False, headers=None): + self.host = host + self.port = port + self.strict = strict + self.timeout = timeout + self.pool = Queue(maxsize) + self.block = block + self.headers = headers or {} + + # Fill the queue up so that doing get() on it will block properly + for _ in xrange(maxsize): + self.pool.put(None) + + # These are mostly for testing and debugging purposes. + self.num_connections = 0 + self.num_requests = 0 + + def _new_conn(self): + """ + Return a fresh :class:`httplib.HTTPConnection`. + """ + self.num_connections += 1 + log.info("Starting new HTTP connection (%d): %s" % + (self.num_connections, self.host)) + return HTTPConnection(host=self.host, port=self.port) + + def _get_conn(self, timeout=None): + """ + Get a connection. Will return a pooled connection if one is available. + + If no connections are available and :prop:`.block` is ``False``, then a + fresh connection is returned. + + :param timeout: + Seconds to wait before giving up and raising + :class:`urllib3.exceptions.EmptyPoolError` if the pool is empty and + :prop:`.block` is ``True``. + """ + conn = None + try: + conn = self.pool.get(block=self.block, timeout=timeout) + + # If this is a persistent connection, check if it got disconnected + if conn and conn.sock and select([conn.sock], [], [], 0.0)[0]: + # Either data is buffered (bad), or the connection is dropped. + log.info("Resetting dropped connection: %s" % self.host) + conn.close() + + except Empty: + if self.block: + raise EmptyPoolError("Pool reached maximum size and no more " + "connections are allowed.") + pass # Oh well, we'll create a new connection then + + return conn or self._new_conn() + + def _put_conn(self, conn): + """ + Put a connection back into the pool. + + :param conn: + Connection object for the current host and port as returned by + :meth:`._new_conn` or :meth:`._get_conn`. + + If the pool is already full, the connection is discarded because we + exceeded maxsize. If connections are discarded frequently, then maxsize + should be increased. + """ + try: + self.pool.put(conn, block=False) + except Full: + # This should never happen if self.block == True + log.warning("HttpConnectionPool is full, discarding connection: %s" + % self.host) + + def _make_request(self, conn, method, url, timeout=_Default, + **httplib_request_kw): + """ + Perform a request on a given httplib connection object taken from our + pool. + """ + self.num_requests += 1 + + if timeout is _Default: + timeout = self.timeout + + conn.timeout = timeout # This only does anything in Py26+ + + conn.request(method, url, **httplib_request_kw) + conn.sock.settimeout(timeout) + httplib_response = conn.getresponse() + + log.debug("\"%s %s %s\" %s %s" % + (method, url, + conn._http_vsn_str, # pylint: disable-msg=W0212 + httplib_response.status, httplib_response.length)) + + return httplib_response + + + def is_same_host(self, url): + """ + Check if the given ``url`` is a member of the same host as this + conncetion pool. + """ + # TODO: Add optional support for socket.gethostbyname checking. + return (url.startswith('/') or + get_host(url) == (self.scheme, self.host, self.port)) + + def urlopen(self, method, url, body=None, headers=None, retries=3, + redirect=True, assert_same_host=True, timeout=_Default, + pool_timeout=None, release_conn=None, **response_kw): + """ + Get a connection from the pool and perform an HTTP request. This is the + lowest level call for making a request, so you'll need to specify all + the raw details. + + .. note:: + + More commonly, it's appropriate to use a convenience method provided + by :class:`.RequestMethods`, such as :meth:`.request`. + + .. note:: + + `release_conn` will only behave as expected if + `preload_content=False` because we want to make + `preload_content=False` the default behaviour someday soon without + breaking backwards compatibility. + + :param method: + HTTP request method (such as GET, POST, PUT, etc.) + + :param body: + Data to send in the request body (useful for creating + POST requests, see HTTPConnectionPool.post_url for + more convenience). + + :param headers: + Dictionary of custom headers to send, such as User-Agent, + If-None-Match, etc. If None, pool headers are used. If provided, + these headers completely replace any pool-specific headers. + + :param retries: + Number of retries to allow before raising a MaxRetryError exception. + + :param redirect: + Automatically handle redirects (status codes 301, 302, 303, 307), + each redirect counts as a retry. + + :param assert_same_host: + If ``True``, will make sure that the host of the pool requests is + consistent else will raise HostChangedError. When False, you can + use the pool on an HTTP proxy and request foreign hosts. + + :param timeout: + If specified, overrides the default timeout for this one request. + + :param pool_timeout: + If set and the pool is set to block=True, then this method will + block for ``pool_timeout`` seconds and raise EmptyPoolError if no + connection is available within the time period. + + :param release_conn: + If False, then the urlopen call will not release the connection + back into the pool once a response is received (but will release if + you read the entire contents of the response such as when + `preload_content=True`). This is useful if you're not preloading + the response's content immediately. You will need to call + ``r.release_conn()`` on the response ``r`` to return the connection + back into the pool. If None, it takes the value of + ``response_kw.get('preload_content', True)``. + + :param \**response_kw: + Additional parameters are passed to + :meth:`urllib3.response.HTTPResponse.from_httplib` + """ + if headers is None: + headers = self.headers + + if retries < 0: + raise MaxRetryError(url) + + if timeout is _Default: + timeout = self.timeout + + if release_conn is None: + release_conn = response_kw.get('preload_content', True) + + # Check host + if assert_same_host and not self.is_same_host(url): + host = "%s://%s" % (self.scheme, self.host) + if self.port: + host = "%s:%d" % (host, self.port) + + raise HostChangedError(host, url, retries - 1) + + conn = None + + try: + # Request a connection from the queue + # (Could raise SocketError: Bad file descriptor) + conn = self._get_conn(timeout=pool_timeout) + + # Make the request on the httplib connection object + httplib_response = self._make_request(conn, method, url, + timeout=timeout, + body=body, headers=headers) + + # If we're going to release the connection in ``finally:``, then + # the request doesn't need to know about the connection. Otherwise + # it will also try to release it and we'll have a double-release + # mess. + response_conn = not release_conn and conn + + # Import httplib's response into our own wrapper object + response = HTTPResponse.from_httplib(httplib_response, + pool=self, + connection=response_conn, + **response_kw) + + # else: + # The connection will be put back into the pool when + # ``response.release_conn()`` is called (implicitly by + # ``response.read()``) + + except (Empty), e: + # Timed out by queue + raise TimeoutError("Request timed out. (pool_timeout=%s)" % + pool_timeout) + + except (SocketTimeout), e: + # Timed out by socket + raise TimeoutError("Request timed out. (timeout=%s)" % + timeout) + + except (BaseSSLError), e: + # SSL certificate error + raise SSLError(e) + + except (CertificateError), e: + # Name mismatch + raise SSLError(e) + + except (HTTPException, SocketError), e: + # Connection broken, discard. It will be replaced next _get_conn(). + conn = None + + finally: + if conn and release_conn: + # Put the connection back to be reused + self._put_conn(conn) + + if not conn: + log.warn("Retrying (%d attempts remain) after connection " + "broken by '%r': %s" % (retries, e, url)) + return self.urlopen(method, url, body, headers, retries - 1, + redirect, assert_same_host) # Try again + + # Handle redirect? + redirect_location = redirect and response.get_redirect_location() + if redirect_location: + log.info("Redirecting %s -> %s" % (url, redirect_location)) + return self.urlopen(method, redirect_location, body, headers, + retries - 1, redirect, assert_same_host) + + return response + + +class HTTPSConnectionPool(HTTPConnectionPool): + """ + Same as :class:`.HTTPConnectionPool`, but HTTPS. + + When Python is compiled with the :mod:`ssl` module, then + :class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates, + instead of :class:httplib.HTTPSConnection`. + + The ``key_file``, ``cert_file``, ``cert_reqs``, and ``ca_certs`` parameters + are only used if :mod:`ssl` is available and are fed into + :meth:`ssl.wrap_socket` to upgrade the connection socket into an SSL socket. + """ + + scheme = 'https' + + def __init__(self, host, port=None, + strict=False, timeout=None, maxsize=1, + block=False, headers=None, + key_file=None, cert_file=None, + cert_reqs='CERT_NONE', ca_certs=None): + + super(HTTPSConnectionPool, self).__init__(host, port, + strict, timeout, maxsize, + block, headers) + self.key_file = key_file + self.cert_file = cert_file + self.cert_reqs = cert_reqs + self.ca_certs = ca_certs + + def _new_conn(self): + """ + Return a fresh :class:`httplib.HTTPSConnection`. + """ + self.num_connections += 1 + log.info("Starting new HTTPS connection (%d): %s" + % (self.num_connections, self.host)) + + if not ssl: + return HTTPSConnection(host=self.host, port=self.port) + + connection = VerifiedHTTPSConnection(host=self.host, port=self.port) + connection.set_cert(key_file=self.key_file, cert_file=self.cert_file, + cert_reqs=self.cert_reqs, ca_certs=self.ca_certs) + return connection + + +## Helpers + +def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, + basic_auth=None): + """ + Shortcuts for generating request headers. + + :param keep_alive: + If ``True``, adds 'connection: keep-alive' header. + + :param accept_encoding: + Can be a boolean, list, or string. + ``True`` translates to 'gzip,deflate'. + List will get joined by comma. + String will be used as provided. + + :param user_agent: + String representing the user-agent you want, such as + "python-urllib3/0.6" + + :param basic_auth: + Colon-separated username:password string for 'authorization: basic ...' + auth header. + + Example: :: + + >>> make_headers(keep_alive=True, user_agent="Batman/1.0") + {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} + >>> make_headers(accept_encoding=True) + {'accept-encoding': 'gzip,deflate'} + """ + headers = {} + if accept_encoding: + if isinstance(accept_encoding, str): + pass + elif isinstance(accept_encoding, list): + accept_encoding = ','.join(accept_encoding) + else: + accept_encoding = 'gzip,deflate' + headers['accept-encoding'] = accept_encoding + + if user_agent: + headers['user-agent'] = user_agent + + if keep_alive: + headers['connection'] = 'keep-alive' + + if basic_auth: + headers['authorization'] = 'Basic ' + \ + basic_auth.encode('base64').strip() + + return headers + + +def get_host(url): + """ + Given a url, return its scheme, host and port (None if it's not there). + + For example: :: + + >>> get_host('http://google.com/mail/') + ('http', 'google.com', None) + >>> get_host('google.com:80') + ('http', 'google.com', 80) + """ + # This code is actually similar to urlparse.urlsplit, but much + # simplified for our needs. + port = None + scheme = 'http' + if '://' in url: + scheme, url = url.split('://', 1) + if '/' in url: + url, _path = url.split('/', 1) + if '@' in url: + _auth, url = url.split('@', 1) + if ':' in url: + url, port = url.split(':', 1) + port = int(port) + return scheme, url, port + + +def connection_from_url(url, **kw): + """ + Given a url, return an :class:`.ConnectionPool` instance of its host. + + This is a shortcut for not having to parse out the scheme, host, and port + of the url before creating an :class:`.ConnectionPool` instance. + + :param url: + Absolute URL string that must include the scheme. Port is optional. + + :param \**kw: + Passes additional parameters to the constructor of the appropriate + :class:`.ConnectionPool`. Useful for specifying things like + timeout, maxsize, headers, etc. + + Example: :: + + >>> conn = connection_from_url('http://google.com/') + >>> r = conn.request('GET', '/') + """ + scheme, host, port = get_host(url) + if scheme == 'https': + return HTTPSConnectionPool(host, port=port, **kw) + else: + return HTTPConnectionPool(host, port=port, **kw) diff --git a/requests/packages/urllib3/contrib/__init__.py b/requests/packages/urllib3/contrib/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/requests/packages/urllib3/contrib/__init__.py diff --git a/requests/packages/urllib3/contrib/ntlmpool.py b/requests/packages/urllib3/contrib/ntlmpool.py new file mode 100644 index 0000000..c5f010e --- /dev/null +++ b/requests/packages/urllib3/contrib/ntlmpool.py @@ -0,0 +1,117 @@ +# urllib3/contrib/ntlmpool.py +# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +""" +NTLM authenticating pool, contributed by erikcederstran + +Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10 +""" + +import httplib +from logging import getLogger +from ntlm import ntlm + +from urllib3 import HTTPSConnectionPool + + +log = getLogger(__name__) + + +class NTLMConnectionPool(HTTPSConnectionPool): + """ + Implements an NTLM authentication version of an urllib3 connection pool + """ + + scheme = 'https' + + def __init__(self, user, pw, authurl, *args, **kwargs): + """ + authurl is a random URL on the server that is protected by NTLM. + user is the Windows user, probably in the DOMAIN\username format. + pw is the password for the user. + """ + super(NTLMConnectionPool, self).__init__(*args, **kwargs) + self.authurl = authurl + self.rawuser = user + user_parts = user.split('\\', 1) + self.domain = user_parts[0].upper() + self.user = user_parts[1] + self.pw = pw + + def _new_conn(self): + # Performs the NTLM handshake that secures the connection. The socket + # must be kept open while requests are performed. + self.num_connections += 1 + log.debug('Starting NTLM HTTPS connection no. %d: https://%s%s' % + (self.num_connections, self.host, self.authurl)) + + headers = {} + headers['Connection'] = 'Keep-Alive' + req_header = 'Authorization' + resp_header = 'www-authenticate' + + conn = httplib.HTTPSConnection(host=self.host, port=self.port) + + # Send negotiation message + headers[req_header] = ( + 'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(self.rawuser)) + log.debug('Request headers: %s' % headers) + conn.request('GET', self.authurl, None, headers) + res = conn.getresponse() + reshdr = dict(res.getheaders()) + log.debug('Response status: %s %s' % (res.status, res.reason)) + log.debug('Response headers: %s' % reshdr) + log.debug('Response data: %s [...]' % res.read(100)) + + # Remove the reference to the socket, so that it can not be closed by + # the response object (we want to keep the socket open) + res.fp = None + + # Server should respond with a challenge message + auth_header_values = reshdr[resp_header].split(', ') + auth_header_value = None + for s in auth_header_values: + if s[:5] == 'NTLM ': + auth_header_value = s[5:] + if auth_header_value is None: + raise Exception('Unexpected %s response header: %s' % + (resp_header, reshdr[resp_header])) + + # Send authentication message + ServerChallenge, NegotiateFlags = \ + ntlm.parse_NTLM_CHALLENGE_MESSAGE(auth_header_value) + auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE(ServerChallenge, + self.user, + self.domain, + self.pw, + NegotiateFlags) + headers[req_header] = 'NTLM %s' % auth_msg + log.debug('Request headers: %s' % headers) + conn.request('GET', self.authurl, None, headers) + res = conn.getresponse() + log.debug('Response status: %s %s' % (res.status, res.reason)) + log.debug('Response headers: %s' % dict(res.getheaders())) + log.debug('Response data: %s [...]' % res.read()[:100]) + if res.status != 200: + if res.status == 401: + raise Exception('Server rejected request: wrong ' + 'username or password') + raise Exception('Wrong server response: %s %s' % + (res.status, res.reason)) + + res.fp = None + log.debug('Connection established') + return conn + + def urlopen(self, method, url, body=None, headers=None, retries=3, + redirect=True, assert_same_host=True): + if headers is None: + headers = {} + headers['Connection'] = 'Keep-Alive' + return super(NTLMConnectionPool, self).urlopen(method, url, body, + headers, retries, + redirect, + assert_same_host) diff --git a/requests/packages/urllib3/exceptions.py b/requests/packages/urllib3/exceptions.py new file mode 100644 index 0000000..47937f7 --- /dev/null +++ b/requests/packages/urllib3/exceptions.py @@ -0,0 +1,45 @@ +# urllib3/exceptions.py +# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +## Exceptions + +class HTTPError(Exception): + "Base exception used by this module." + pass + + +class SSLError(Exception): + "Raised when SSL certificate fails in an HTTPS connection." + pass + + +class MaxRetryError(HTTPError): + "Raised when the maximum number of retries is exceeded." + def __init__(self, url): + HTTPError.__init__(self, "Max retries exceeded for url: %s" % url) + self.url = url + + +class TimeoutError(HTTPError): + "Raised when a socket timeout occurs." + pass + + +class HostChangedError(HTTPError): + "Raised when an existing pool gets a request for a foreign host." + def __init__(self, original_host, new_url, retries=3): + HTTPError.__init__(self, + "Connection pool with host '%s' tried to open a foreign host: %s" % + (original_host, new_url)) + + self.original_host = original_host + self.new_url = new_url + self.retries = retries + + +class EmptyPoolError(HTTPError): + "Raised when a pool runs out of connections and no more are allowed." + pass diff --git a/requests/packages/urllib3/filepost.py b/requests/packages/urllib3/filepost.py new file mode 100644 index 0000000..2ffea8b --- /dev/null +++ b/requests/packages/urllib3/filepost.py @@ -0,0 +1,71 @@ +# urllib3/filepost.py +# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +import codecs +import mimetools +import mimetypes + +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO # pylint: disable-msg=W0404 + + +writer = codecs.lookup('utf-8')[3] + + +def get_content_type(filename): + return mimetypes.guess_type(filename)[0] or 'application/octet-stream' + + +def encode_multipart_formdata(fields, boundary=None): + """ + Encode a dictionary of ``fields`` using the multipart/form-data mime format. + + :param fields: + Dictionary of fields. The key is treated as the field name, and the + value as the body of the form-data. If the value is a tuple of two + elements, then the first element is treated as the filename of the + form-data section. + + :param boundary: + If not specified, then a random boundary will be generated using + :func:`mimetools.choose_boundary`. + """ + body = StringIO() + if boundary is None: + boundary = mimetools.choose_boundary() + + for fieldname, value in fields.iteritems(): + body.write('--%s\r\n' % (boundary)) + + if isinstance(value, tuple): + filename, data = value + writer(body).write('Content-Disposition: form-data; name="%s"; ' + 'filename="%s"\r\n' % (fieldname, filename)) + body.write('Content-Type: %s\r\n\r\n' % + (get_content_type(filename))) + else: + data = value + writer(body).write('Content-Disposition: form-data; name="%s"\r\n' + % (fieldname)) + body.write('Content-Type: text/plain\r\n\r\n') + + if isinstance(data, int): + data = str(data) # Backwards compatibility + + if isinstance(data, unicode): + writer(body).write(data) + else: + body.write(data) + + body.write('\r\n') + + body.write('--%s--\r\n' % (boundary)) + + content_type = 'multipart/form-data; boundary=%s' % boundary + + return body.getvalue(), content_type diff --git a/requests/packages/urllib3/packages/__init__.py b/requests/packages/urllib3/packages/__init__.py new file mode 100644 index 0000000..37e8351 --- /dev/null +++ b/requests/packages/urllib3/packages/__init__.py @@ -0,0 +1,4 @@ +from __future__ import absolute_import + +from . import ssl_match_hostname + diff --git a/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py b/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py new file mode 100644 index 0000000..9560b04 --- /dev/null +++ b/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py @@ -0,0 +1,61 @@ +"""The match_hostname() function from Python 3.2, essential when using SSL.""" + +import re + +__version__ = '3.2.2' + +class CertificateError(ValueError): + pass + +def _dnsname_to_pat(dn): + pats = [] + for frag in dn.split(r'.'): + if frag == '*': + # When '*' is a fragment by itself, it matches a non-empty dotless + # fragment. + pats.append('[^.]+') + else: + # Otherwise, '*' matches any dotless fragment. + frag = re.escape(frag) + pats.append(frag.replace(r'\*', '[^.]*')) + return re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) + +def match_hostname(cert, hostname): + """Verify that *cert* (in decoded format as returned by + SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 rules + are mostly followed, but IP addresses are not accepted for *hostname*. + + CertificateError is raised on failure. On success, the function + returns nothing. + """ + if not cert: + raise ValueError("empty or no certificate") + dnsnames = [] + san = cert.get('subjectAltName', ()) + for key, value in san: + if key == 'DNS': + if _dnsname_to_pat(value).match(hostname): + return + dnsnames.append(value) + if not dnsnames: + # The subject is only checked when there is no dNSName entry + # in subjectAltName + for sub in cert.get('subject', ()): + for key, value in sub: + # XXX according to RFC 2818, the most specific Common Name + # must be used. + if key == 'commonName': + if _dnsname_to_pat(value).match(hostname): + return + dnsnames.append(value) + if len(dnsnames) > 1: + raise CertificateError("hostname %r " + "doesn't match either of %s" + % (hostname, ', '.join(map(repr, dnsnames)))) + elif len(dnsnames) == 1: + raise CertificateError("hostname %r " + "doesn't match %r" + % (hostname, dnsnames[0])) + else: + raise CertificateError("no appropriate commonName or " + "subjectAltName fields were found") diff --git a/requests/packages/urllib3/poolmanager.py b/requests/packages/urllib3/poolmanager.py new file mode 100644 index 0000000..482ee4a --- /dev/null +++ b/requests/packages/urllib3/poolmanager.py @@ -0,0 +1,133 @@ +# urllib3/poolmanager.py +# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +import logging + +from ._collections import RecentlyUsedContainer +from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool +from .connectionpool import get_host, connection_from_url +from .exceptions import HostChangedError +from .request import RequestMethods + + +__all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url'] + + +pool_classes_by_scheme = { + 'http': HTTPConnectionPool, + 'https': HTTPSConnectionPool, +} + +port_by_scheme = { + 'http': 80, + 'https': 443, +} + +log = logging.getLogger(__name__) + + +class PoolManager(RequestMethods): + """ + Allows for arbitrary requests while transparently keeping track of + necessary connection pools for you. + + :param num_pools: + Number of connection pools to cache before discarding the least recently + used pool. + + :param \**connection_pool_kw: + Additional parameters are used to create fresh + :class:`urllib3.connectionpool.ConnectionPool` instances. + + Example: :: + + >>> manager = PoolManager() + >>> r = manager.urlopen("http://google.com/") + >>> r = manager.urlopen("http://google.com/mail") + >>> r = manager.urlopen("http://yahoo.com/") + >>> len(r.pools) + 2 + + """ + + # TODO: Make sure there are no memory leaks here. + + def __init__(self, num_pools=10, **connection_pool_kw): + self.connection_pool_kw = connection_pool_kw + self.pools = RecentlyUsedContainer(num_pools) + + def connection_from_host(self, host, port=80, scheme='http'): + """ + Get a :class:`ConnectionPool` based on the host, port, and scheme. + + Note that an appropriate ``port`` value is required here to normalize + connection pools in our container most effectively. + """ + pool_key = (scheme, host, port) + + # If the scheme, host, or port doesn't match existing open connections, + # open a new ConnectionPool. + pool = self.pools.get(pool_key) + if pool: + return pool + + # Make a fresh ConnectionPool of the desired type + pool_cls = pool_classes_by_scheme[scheme] + pool = pool_cls(host, port, **self.connection_pool_kw) + + self.pools[pool_key] = pool + + return pool + + def connection_from_url(self, url): + """ + Similar to :func:`urllib3.connectionpool.connection_from_url` but + doesn't pass any additional parameters to the + :class:`urllib3.connectionpool.ConnectionPool` constructor. + + Additional parameters are taken from the :class:`.PoolManager` + constructor. + """ + scheme, host, port = get_host(url) + + port = port or port_by_scheme.get(scheme, 80) + + return self.connection_from_host(host, port=port, scheme=scheme) + + def urlopen(self, method, url, **kw): + """ + Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen`. + + ``url`` must be absolute, such that an appropriate + :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. + """ + conn = self.connection_from_url(url) + try: + return conn.urlopen(method, url, **kw) + + except HostChangedError, e: + kw['retries'] = e.retries # Persist retries countdown + return self.urlopen(method, e.new_url, **kw) + + +class ProxyManager(RequestMethods): + """ + Given a ConnectionPool to a proxy, the ProxyManager's ``urlopen`` method + will make requests to any url through the defined proxy. + """ + + def __init__(self, proxy_pool): + self.proxy_pool = proxy_pool + + def urlopen(self, method, url, **kw): + "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." + kw['assert_same_host'] = False + return self.proxy_pool.urlopen(method, url, **kw) + + +def proxy_from_url(url, **pool_kw): + proxy_pool = connection_from_url(url, **pool_kw) + return ProxyManager(proxy_pool) diff --git a/requests/packages/urllib3/request.py b/requests/packages/urllib3/request.py new file mode 100644 index 0000000..a7e0b5d --- /dev/null +++ b/requests/packages/urllib3/request.py @@ -0,0 +1,145 @@ +# urllib3/request.py +# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + + +from urllib import urlencode + +from .filepost import encode_multipart_formdata + + +__all__ = ['RequestMethods'] + + +class RequestMethods(object): + """ + Convenience mixin for classes who implement a :meth:`urlopen` method, such + as :class:`~urllib3.connectionpool.HTTPConnectionPool` and + :class:`~urllib3.poolmanager.PoolManager`. + + Provides behavior for making common types of HTTP request methods and + decides which type of request field encoding to use. + + Specifically, + + :meth:`.request_encode_url` is for sending requests whose fields are encoded + in the URL (such as GET, HEAD, DELETE). + + :meth:`.request_encode_body` is for sending requests whose fields are + encoded in the *body* of the request using multipart or www-orm-urlencoded + (such as for POST, PUT, PATCH). + + :meth:`.request` is for making any kind of request, it will look up the + appropriate encoding format and use one of the above two methods to make + the request. + """ + + _encode_url_methods = set(['DELETE', 'GET', 'HEAD', 'OPTIONS']) + + _encode_body_methods = set(['PATCH', 'POST', 'PUT', 'TRACE']) + + def urlopen(self, method, url, body=None, headers=None, + encode_multipart=True, multipart_boundary=None, + **kw): + raise NotImplemented("Classes extending RequestMethods must implement " + "their own ``urlopen`` method.") + + def request(self, method, url, fields=None, headers=None, **urlopen_kw): + """ + Make a request using :meth:`urlopen` with the appropriate encoding of + ``fields`` based on the ``method`` used. + + This is a convenience method that requires the least amount of manual + effort. It can be used in most situations, while still having the option + to drop down to more specific methods when necessary, such as + :meth:`request_encode_url`, :meth:`request_encode_body`, + or even the lowest level :meth:`urlopen`. + """ + method = method.upper() + + if method in self._encode_url_methods: + return self.request_encode_url(method, url, fields=fields, + headers=headers, + **urlopen_kw) + else: + return self.request_encode_body(method, url, fields=fields, + headers=headers, + **urlopen_kw) + + def request_encode_url(self, method, url, fields=None, **urlopen_kw): + """ + Make a request using :meth:`urlopen` with the ``fields`` encoded in + the url. This is useful for request methods like GET, HEAD, DELETE, etc. + """ + if fields: + url += '?' + urlencode(fields) + return self.urlopen(method, url, **urlopen_kw) + + def request_encode_body(self, method, url, fields=None, headers=None, + encode_multipart=True, multipart_boundary=None, + **urlopen_kw): + """ + Make a request using :meth:`urlopen` with the ``fields`` encoded in + the body. This is useful for request methods like POST, PUT, PATCH, etc. + + When ``encode_multipart=True`` (default), then + :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode the + payload with the appropriate content type. Otherwise + :meth:`urllib.urlencode` is used with the + 'application/x-www-form-urlencoded' content type. + + Multipart encoding must be used when posting files, and it's reasonably + safe to use it in other times too. However, it may break request signing, + such as with OAuth. + + Supports an optional ``fields`` parameter of key/value strings AND + key/filetuple. A filetuple is a (filename, data) tuple. For example: :: + + fields = { + 'foo': 'bar', + 'fakefile': ('foofile.txt', 'contents of foofile'), + 'realfile': ('barfile.txt', open('realfile').read()), + 'nonamefile': ('contents of nonamefile field'), + } + + When uploading a file, providing a filename (the first parameter of the + tuple) is optional but recommended to best mimick behavior of browsers. + + Note that if ``headers`` are supplied, the 'Content-Type' header will be + overwritten because it depends on the dynamic random boundary string + which is used to compose the body of the request. The random boundary + string can be explicitly set with the ``multipart_boundary`` parameter. + """ + if encode_multipart: + body, content_type = encode_multipart_formdata(fields or {}, + boundary=multipart_boundary) + else: + body, content_type = (urlencode(fields or {}), + 'application/x-www-form-urlencoded') + + headers = headers or {} + headers.update({'Content-Type': content_type}) + + return self.urlopen(method, url, body=body, headers=headers, + **urlopen_kw) + + # Deprecated: + + def get_url(self, url, fields=None, **urlopen_kw): + """ + .. deprecated:: 1.0 + Use :meth:`request` instead. + """ + return self.request_encode_url('GET', url, fields=fields, + **urlopen_kw) + + def post_url(self, url, fields=None, headers=None, **urlopen_kw): + """ + .. deprecated:: 1.0 + Use :meth:`request` instead. + """ + return self.request_encode_body('POST', url, fields=fields, + headers=headers, + **urlopen_kw) diff --git a/requests/packages/urllib3/response.py b/requests/packages/urllib3/response.py new file mode 100644 index 0000000..ee2ff66 --- /dev/null +++ b/requests/packages/urllib3/response.py @@ -0,0 +1,195 @@ +# urllib3/response.py +# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +import gzip +import logging +import zlib + + +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO # pylint: disable-msg=W0404 + + +from .exceptions import HTTPError + + +log = logging.getLogger(__name__) + + +def decode_gzip(data): + gzipper = gzip.GzipFile(fileobj=StringIO(data)) + return gzipper.read() + + +def decode_deflate(data): + try: + return zlib.decompress(data) + except zlib.error: + return zlib.decompress(data, -zlib.MAX_WBITS) + + +class HTTPResponse(object): + """ + HTTP Response container. + + Backwards-compatible to httplib's HTTPResponse but the response ``body`` is + loaded and decoded on-demand when the ``data`` property is accessed. + + Extra parameters for behaviour not present in httplib.HTTPResponse: + + :param preload_content: + If True, the response's body will be preloaded during construction. + + :param decode_content: + If True, attempts to decode specific content-encoding's based on headers + (like 'gzip' and 'deflate') will be skipped and raw data will be used + instead. + + :param original_response: + When this HTTPResponse wrapper is generated from an httplib.HTTPResponse + object, it's convenient to include the original for debug purposes. It's + otherwise unused. + """ + + CONTENT_DECODERS = { + 'gzip': decode_gzip, + 'deflate': decode_deflate, + } + + def __init__(self, body='', headers=None, status=0, version=0, reason=None, + strict=0, preload_content=True, decode_content=True, + original_response=None, pool=None, connection=None): + self.headers = headers or {} + self.status = status + self.version = version + self.reason = reason + self.strict = strict + + self._decode_content = decode_content + self._body = None + self._fp = None + self._original_response = original_response + + self._pool = pool + self._connection = connection + + if hasattr(body, 'read'): + self._fp = body + + if preload_content: + self._body = self.read(decode_content=decode_content) + + def get_redirect_location(self): + """ + Should we redirect and where to? + + :returns: Truthy redirect location string if we got a redirect status + code and valid location. ``None`` if redirect status and no + location. ``False`` if not a redirect status code. + """ + if self.status in [301, 302, 303, 307]: + return self.headers.get('location') + + return False + + def release_conn(self): + if not self._pool or not self._connection: + return + + self._pool._put_conn(self._connection) + self._connection = None + + @property + def data(self): + # For backwords-compat with earlier urllib3 0.4 and earlier. + if self._body: + return self._body + + if self._fp: + return self.read(cache_content=True) + + def read(self, amt=None, decode_content=None, cache_content=False): + """ + Similar to :meth:`httplib.HTTPResponse.read`, but with two additional + parameters: ``decode_content`` and ``cache_content``. + + :param amt: + How much of the content to read. If specified, decoding and caching + is skipped because we can't decode partial content nor does it make + sense to cache partial content as the full response. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. (Overridden if ``amt`` is set.) + + :param cache_content: + If True, will save the returned data such that the same result is + returned despite of the state of the underlying file object. This + is useful if you want the ``.data`` property to continue working + after having ``.read()`` the file object. (Overridden if ``amt`` is + set.) + """ + content_encoding = self.headers.get('content-encoding') + decoder = self.CONTENT_DECODERS.get(content_encoding) + if decode_content is None: + decode_content = self._decode_content + + data = self._fp and self._fp.read(amt) + + try: + + if amt: + return data + + if not decode_content or not decoder: + if cache_content: + self._body = data + + return data + + try: + data = decoder(data) + except IOError: + raise HTTPError("Received response with content-encoding: %s, but " + "failed to decode it." % content_encoding) + + if cache_content: + self._body = data + + return data + + finally: + + if self._original_response and self._original_response.isclosed(): + self.release_conn() + + @classmethod + def from_httplib(ResponseCls, r, **response_kw): + """ + Given an :class:`httplib.HTTPResponse` instance ``r``, return a + corresponding :class:`urllib3.response.HTTPResponse` object. + + Remaining parameters are passed to the HTTPResponse constructor, along + with ``original_response=r``. + """ + + return ResponseCls(body=r, + headers=dict(r.getheaders()), + status=r.status, + version=r.version, + reason=r.reason, + strict=r.strict, + original_response=r, + **response_kw) + + # Backwards-compatibility methods for httplib.HTTPResponse + def getheaders(self): + return self.headers + + def getheader(self, name, default=None): + return self.headers.get(name, default) diff --git a/requests/sessions.py b/requests/sessions.py new file mode 100644 index 0000000..af9f9c7 --- /dev/null +++ b/requests/sessions.py @@ -0,0 +1,288 @@ +# -*- coding: utf-8 -*- + +""" +requests.session +~~~~~~~~~~~~~~~~ + +This module provides a Session object to manage and persist settings across +requests (cookies, auth, proxies). + +""" + +from .defaults import defaults +from .models import Request +from .hooks import dispatch_hook +from .utils import header_expand +from .packages.urllib3.poolmanager import PoolManager + + +def merge_kwargs(local_kwarg, default_kwarg): + """Merges kwarg dictionaries. + + If a local key in the dictionary is set to None, it will be removed. + """ + + if default_kwarg is None: + return local_kwarg + + if isinstance(local_kwarg, basestring): + return local_kwarg + + if local_kwarg is None: + return default_kwarg + + # Bypass if not a dictionary (e.g. timeout) + if not hasattr(default_kwarg, 'items'): + return local_kwarg + + # Update new values. + kwargs = default_kwarg.copy() + kwargs.update(local_kwarg) + + # Remove keys that are set to None. + for (k,v) in local_kwarg.items(): + if v is None: + del kwargs[k] + + return kwargs + + +class Session(object): + """A Requests session.""" + + __attrs__ = [ + 'headers', 'cookies', 'auth', 'timeout', 'proxies', 'hooks', + 'params', 'config'] + + + def __init__(self, + headers=None, + cookies=None, + auth=None, + timeout=None, + proxies=None, + hooks=None, + params=None, + config=None, + verify=True): + + self.headers = headers or {} + self.cookies = cookies or {} + self.auth = auth + self.timeout = timeout + self.proxies = proxies or {} + self.hooks = hooks or {} + self.params = params or {} + self.config = config or {} + self.verify = verify + + for (k, v) in defaults.items(): + self.config.setdefault(k, v) + + self.poolmanager = PoolManager( + num_pools=self.config.get('pool_connections'), + maxsize=self.config.get('pool_maxsize') + ) + + # Set up a CookieJar to be used by default + self.cookies = {} + + # Add passed cookies in. + if cookies is not None: + self.cookies.update(cookies) + + def __repr__(self): + return '<requests-client at 0x%x>' % (id(self)) + + def __enter__(self): + return self + + def __exit__(self, *args): + pass + + def request(self, method, url, + params=None, + data=None, + headers=None, + cookies=None, + files=None, + auth=None, + timeout=None, + allow_redirects=False, + proxies=None, + hooks=None, + return_response=True, + config=None, + prefetch=False, + verify=None): + + """Constructs and sends a :class:`Request <Request>`. + Returns :class:`Response <Response>` object. + + :param method: method for the new :class:`Request` object. + :param url: URL for the new :class:`Request` object. + :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`. + :param data: (optional) Dictionary or bytes to send in the body of the :class:`Request`. + :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`. + :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. + :param files: (optional) Dictionary of 'filename': file-like-objects for multipart encoding upload. + :param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth. + :param timeout: (optional) Float describing the timeout of the request. + :param allow_redirects: (optional) Boolean. Set to True if POST/PUT/DELETE redirect following is allowed. + :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. + :param return_response: (optional) If False, an un-sent Request object will returned. + :param config: (optional) A configuration dictionary. + :param prefetch: (optional) if ``True``, the response content will be immediately downloaded. + :param verify: (optional) if ``True``, the SSL cert will be verified. A CA_BUNDLE path can also be provided. + """ + + method = str(method).upper() + + # Default empty dicts for dict params. + cookies = {} if cookies is None else cookies + data = {} if data is None else data + files = {} if files is None else files + headers = {} if headers is None else headers + params = {} if params is None else params + hooks = {} if hooks is None else hooks + + if verify is None: + verify = self.verify + + # use session's hooks as defaults + for key, cb in self.hooks.iteritems(): + hooks.setdefault(key, cb) + + # Expand header values. + if headers: + for k, v in headers.items() or {}: + headers[k] = header_expand(v) + + args = dict( + method=method, + url=url, + data=data, + params=params, + headers=headers, + cookies=cookies, + files=files, + auth=auth, + hooks=hooks, + timeout=timeout, + allow_redirects=allow_redirects, + proxies=proxies, + config=config, + verify=verify, + _poolmanager=self.poolmanager + ) + + # Merge local kwargs with session kwargs. + for attr in self.__attrs__: + session_val = getattr(self, attr, None) + local_val = args.get(attr) + + args[attr] = merge_kwargs(local_val, session_val) + + # Arguments manipulation hook. + args = dispatch_hook('args', args['hooks'], args) + + # Create the (empty) response. + r = Request(**args) + + # Give the response some context. + r.session = self + + # Don't send if asked nicely. + if not return_response: + return r + + # Send the HTTP Request. + r.send(prefetch=prefetch) + + # Send any cookies back up the to the session. + self.cookies.update(r.response.cookies) + + # Return the response. + return r.response + + + def get(self, url, **kwargs): + """Sends a GET request. Returns :class:`Response` object. + + :param url: URL for the new :class:`Request` object. + :param **kwargs: Optional arguments that ``request`` takes. + """ + + kwargs.setdefault('allow_redirects', True) + return self.request('get', url, **kwargs) + + + def options(self, url, **kwargs): + """Sends a OPTIONS request. Returns :class:`Response` object. + + :param url: URL for the new :class:`Request` object. + :param **kwargs: Optional arguments that ``request`` takes. + """ + + kwargs.setdefault('allow_redirects', True) + return self.request('options', url, **kwargs) + + + def head(self, url, **kwargs): + """Sends a HEAD request. Returns :class:`Response` object. + + :param url: URL for the new :class:`Request` object. + :param **kwargs: Optional arguments that ``request`` takes. + """ + + kwargs.setdefault('allow_redirects', True) + return self.request('head', url, **kwargs) + + + def post(self, url, data=None, **kwargs): + """Sends a POST request. Returns :class:`Response` object. + + :param url: URL for the new :class:`Request` object. + :param data: (optional) Dictionary or bytes to send in the body of the :class:`Request`. + :param **kwargs: Optional arguments that ``request`` takes. + """ + + return self.request('post', url, data=data, **kwargs) + + + def put(self, url, data=None, **kwargs): + """Sends a PUT request. Returns :class:`Response` object. + + :param url: URL for the new :class:`Request` object. + :param data: (optional) Dictionary or bytes to send in the body of the :class:`Request`. + :param **kwargs: Optional arguments that ``request`` takes. + """ + + return self.request('put', url, data=data, **kwargs) + + + def patch(self, url, data=None, **kwargs): + """Sends a PATCH request. Returns :class:`Response` object. + + :param url: URL for the new :class:`Request` object. + :param data: (optional) Dictionary or bytes to send in the body of the :class:`Request`. + :param **kwargs: Optional arguments that ``request`` takes. + """ + + return self.request('patch', url, data=data, **kwargs) + + + def delete(self, url, **kwargs): + """Sends a DELETE request. Returns :class:`Response` object. + + :param url: URL for the new :class:`Request` object. + :param **kwargs: Optional arguments that ``request`` takes. + """ + + return self.request('delete', url, **kwargs) + + +def session(**kwargs): + """Returns a :class:`Session` for context-management.""" + + return Session(**kwargs) diff --git a/requests/status_codes.py b/requests/status_codes.py new file mode 100644 index 0000000..fab8e95 --- /dev/null +++ b/requests/status_codes.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- + +from .structures import LookupDict + +_codes = { + + # Informational. + 100: ('continue',), + 101: ('switching_protocols',), + 102: ('processing',), + 103: ('checkpoint',), + 122: ('uri_too_long', 'request_uri_too_long'), + 200: ('ok', 'okay', 'all_ok', 'all_okay', 'all_good', '\\o/'), + 201: ('created',), + 202: ('accepted',), + 203: ('non_authoritative_info', 'non_authoritative_information'), + 204: ('no_content',), + 205: ('reset_content', 'reset'), + 206: ('partial_content', 'partial'), + 207: ('multi_status', 'multiple_status', 'multi_stati', 'multiple_stati'), + 208: ('im_used',), + + # Redirection. + 300: ('multiple_choices',), + 301: ('moved_permanently', 'moved', '\\o-'), + 302: ('found',), + 303: ('see_other', 'other'), + 304: ('not_modified',), + 305: ('use_proxy',), + 306: ('switch_proxy',), + 307: ('temporary_redirect', 'temporary_moved', 'temporary'), + 308: ('resume_incomplete', 'resume'), + + # Client Error. + 400: ('bad_request', 'bad'), + 401: ('unauthorized',), + 402: ('payment_required', 'payment'), + 403: ('forbidden',), + 404: ('not_found', '-o-'), + 405: ('method_not_allowed', 'not_allowed'), + 406: ('not_acceptable',), + 407: ('proxy_authentication_required', 'proxy_auth', 'proxy_authentication'), + 408: ('request_timeout', 'timeout'), + 409: ('conflict',), + 410: ('gone',), + 411: ('length_required',), + 412: ('precondition_failed', 'precondition'), + 413: ('request_entity_too_large',), + 414: ('request_uri_too_large',), + 415: ('unsupported_media_type', 'unsupported_media', 'media_type'), + 416: ('requested_range_not_satisfiable', 'requested_range', 'range_not_satisfiable'), + 417: ('expectation_failed',), + 418: ('im_a_teapot', 'teapot', 'i_am_a_teapot'), + 422: ('unprocessable_entity', 'unprocessable'), + 423: ('locked',), + 424: ('failed_dependency', 'dependency'), + 425: ('unordered_collection', 'unordered'), + 426: ('upgrade_required', 'upgrade'), + 428: ('precondition_required', 'precondition'), + 429: ('too_many_requests', 'too_many'), + 431: ('header_fields_too_large', 'fields_too_large'), + 444: ('no_response', 'none'), + 449: ('retry_with', 'retry'), + 450: ('blocked_by_windows_parental_controls', 'parental_controls'), + 499: ('client_closed_request',), + + # Server Error. + 500: ('internal_server_error', 'server_error', '/o\\'), + 501: ('not_implemented',), + 502: ('bad_gateway',), + 503: ('service_unavailable', 'unavailable'), + 504: ('gateway_timeout',), + 505: ('http_version_not_supported', 'http_version'), + 506: ('variant_also_negotiates',), + 507: ('insufficient_storage',), + 509: ('bandwidth_limit_exceeded', 'bandwidth'), + 510: ('not_extended',), +} + +codes = LookupDict(name='status_codes') + +for (code, titles) in _codes.items(): + for title in titles: + setattr(codes, title, code) + if not title.startswith('\\'): + setattr(codes, title.upper(), code)
\ No newline at end of file diff --git a/requests/structures.py b/requests/structures.py new file mode 100644 index 0000000..35a903f --- /dev/null +++ b/requests/structures.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- + +""" +requests.structures +~~~~~~~~~~~~~~~~~~~ + +Data structures that power Requests. + +""" + + +class CaseInsensitiveDict(dict): + """Case-insensitive Dictionary + + For example, ``headers['content-encoding']`` will return the + value of a ``'Content-Encoding'`` response header.""" + + @property + def lower_keys(self): + if not hasattr(self, '_lower_keys') or not self._lower_keys: + self._lower_keys = dict((k.lower(), k) for k in self.iterkeys()) + return self._lower_keys + + def _clear_lower_keys(self): + if hasattr(self, '_lower_keys'): + self._lower_keys.clear() + + def __setitem__(self, key, value): + dict.__setitem__(self, key, value) + self._clear_lower_keys() + + def __delitem__(self, key): + dict.__delitem__(self, key) + self._lower_keys.clear() + + def __contains__(self, key): + return key.lower() in self.lower_keys + + def __getitem__(self, key): + # We allow fall-through here, so values default to None + if key in self: + return dict.__getitem__(self, self.lower_keys[key.lower()]) + + def get(self, key, default=None): + if key in self: + return self[key] + else: + return default + +class LookupDict(dict): + """Dictionary lookup object.""" + + def __init__(self, name=None): + self.name = name + super(LookupDict, self).__init__() + + def __repr__(self): + return '<lookup \'%s\'>' % (self.name) + + def __getitem__(self, key): + # We allow fall-through here, so values default to None + + return self.__dict__.get(key, None) + + def get(self, key, default=None): + return self.__dict__.get(key, default)
\ No newline at end of file diff --git a/requests/utils.py b/requests/utils.py new file mode 100644 index 0000000..95dea4b --- /dev/null +++ b/requests/utils.py @@ -0,0 +1,401 @@ +# -*- coding: utf-8 -*- + +""" +requests.utils +~~~~~~~~~~~~~~ + +This module provides utility functions that are used within Requests +that are also useful for external consumption. + +""" + +import cgi +import codecs +import cookielib +import os +import random +import re +import zlib +import urllib + +from urllib2 import parse_http_list as _parse_list_header + + +def guess_filename(obj): + """Tries to guess the filename of the given object.""" + name = getattr(obj, 'name', None) + if name and name[0] != '<' and name[-1] != '>': + return name + +# From mitsuhiko/werkzeug (used with permission). +def parse_list_header(value): + """Parse lists as described by RFC 2068 Section 2. + + In particular, parse comma-separated lists where the elements of + the list may include quoted-strings. A quoted-string could + contain a comma. A non-quoted string could have quotes in the + middle. Quotes are removed automatically after parsing. + + It basically works like :func:`parse_set_header` just that items + may appear multiple times and case sensitivity is preserved. + + The return value is a standard :class:`list`: + + >>> parse_list_header('token, "quoted value"') + ['token', 'quoted value'] + + To create a header from the :class:`list` again, use the + :func:`dump_header` function. + + :param value: a string with a list header. + :return: :class:`list` + """ + result = [] + for item in _parse_list_header(value): + if item[:1] == item[-1:] == '"': + item = unquote_header_value(item[1:-1]) + result.append(item) + return result + + +# From mitsuhiko/werkzeug (used with permission). +def parse_dict_header(value): + """Parse lists of key, value pairs as described by RFC 2068 Section 2 and + convert them into a python dict: + + >>> d = parse_dict_header('foo="is a fish", bar="as well"') + >>> type(d) is dict + True + >>> sorted(d.items()) + [('bar', 'as well'), ('foo', 'is a fish')] + + If there is no value for a key it will be `None`: + + >>> parse_dict_header('key_without_value') + {'key_without_value': None} + + To create a header from the :class:`dict` again, use the + :func:`dump_header` function. + + :param value: a string with a dict header. + :return: :class:`dict` + """ + result = {} + for item in _parse_list_header(value): + if '=' not in item: + result[item] = None + continue + name, value = item.split('=', 1) + if value[:1] == value[-1:] == '"': + value = unquote_header_value(value[1:-1]) + result[name] = value + return result + + +# From mitsuhiko/werkzeug (used with permission). +def unquote_header_value(value, is_filename=False): + r"""Unquotes a header value. (Reversal of :func:`quote_header_value`). + This does not use the real unquoting but what browsers are actually + using for quoting. + + :param value: the header value to unquote. + """ + if value and value[0] == value[-1] == '"': + # this is not the real unquoting, but fixing this so that the + # RFC is met will result in bugs with internet explorer and + # probably some other browsers as well. IE for example is + # uploading files with "C:\foo\bar.txt" as filename + value = value[1:-1] + + # if this is a filename and the starting characters look like + # a UNC path, then just return the value without quotes. Using the + # replace sequence below on a UNC path has the effect of turning + # the leading double slash into a single slash and then + # _fix_ie_filename() doesn't work correctly. See #458. + if not is_filename or value[:2] != '\\\\': + return value.replace('\\\\', '\\').replace('\\"', '"') + return value + + +def header_expand(headers): + """Returns an HTTP Header value string from a dictionary. + + Example expansion:: + + {'text/x-dvi': {'q': '.8', 'mxb': '100000', 'mxt': '5.0'}, 'text/x-c': {}} + # Accept: text/x-dvi; q=.8; mxb=100000; mxt=5.0, text/x-c + + (('text/x-dvi', {'q': '.8', 'mxb': '100000', 'mxt': '5.0'}), ('text/x-c', {})) + # Accept: text/x-dvi; q=.8; mxb=100000; mxt=5.0, text/x-c + """ + + collector = [] + + if isinstance(headers, dict): + headers = headers.items() + + elif isinstance(headers, basestring): + return headers + + for i, (value, params) in enumerate(headers): + + _params = [] + + for (p_k, p_v) in params.items(): + + _params.append('%s=%s' % (p_k, p_v)) + + collector.append(value) + collector.append('; ') + + if len(params): + + collector.append('; '.join(_params)) + + if not len(headers) == i+1: + collector.append(', ') + + + # Remove trailing separators. + if collector[-1] in (', ', '; '): + del collector[-1] + + return ''.join(collector) + + + +def randombytes(n): + """Return n random bytes.""" + # Use /dev/urandom if it is available. Fall back to random module + # if not. It might be worthwhile to extend this function to use + # other platform-specific mechanisms for getting random bytes. + if os.path.exists("/dev/urandom"): + f = open("/dev/urandom") + s = f.read(n) + f.close() + return s + else: + L = [chr(random.randrange(0, 256)) for i in range(n)] + return "".join(L) + + +def dict_from_cookiejar(cj): + """Returns a key/value dictionary from a CookieJar. + + :param cj: CookieJar object to extract cookies from. + """ + + cookie_dict = {} + + for _, cookies in cj._cookies.items(): + for _, cookies in cookies.items(): + for cookie in cookies.values(): + # print cookie + cookie_dict[cookie.name] = cookie.value + + return cookie_dict + + +def cookiejar_from_dict(cookie_dict): + """Returns a CookieJar from a key/value dictionary. + + :param cookie_dict: Dict of key/values to insert into CookieJar. + """ + + # return cookiejar if one was passed in + if isinstance(cookie_dict, cookielib.CookieJar): + return cookie_dict + + # create cookiejar + cj = cookielib.CookieJar() + + cj = add_dict_to_cookiejar(cj, cookie_dict) + + return cj + + +def add_dict_to_cookiejar(cj, cookie_dict): + """Returns a CookieJar from a key/value dictionary. + + :param cj: CookieJar to insert cookies into. + :param cookie_dict: Dict of key/values to insert into CookieJar. + """ + + for k, v in cookie_dict.items(): + + cookie = cookielib.Cookie( + version=0, + name=k, + value=v, + port=None, + port_specified=False, + domain='', + domain_specified=False, + domain_initial_dot=False, + path='/', + path_specified=True, + secure=False, + expires=None, + discard=True, + comment=None, + comment_url=None, + rest={'HttpOnly': None}, + rfc2109=False + ) + + # add cookie to cookiejar + cj.set_cookie(cookie) + + return cj + + +def get_encodings_from_content(content): + """Returns encodings from given content string. + + :param content: bytestring to extract encodings from. + """ + + charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I) + + return charset_re.findall(content) + + +def get_encoding_from_headers(headers): + """Returns encodings from given HTTP Header Dict. + + :param headers: dictionary to extract encoding from. + """ + + content_type = headers.get('content-type') + + if not content_type: + return None + + content_type, params = cgi.parse_header(content_type) + + if 'charset' in params: + return params['charset'].strip("'\"") + + if 'text' in content_type: + return 'ISO-8859-1' + + +def unicode_from_html(content): + """Attempts to decode an HTML string into unicode. + If unsuccessful, the original content is returned. + """ + + encodings = get_encodings_from_content(content) + + for encoding in encodings: + + try: + return unicode(content, encoding) + except (UnicodeError, TypeError): + pass + + return content + + +def stream_decode_response_unicode(iterator, r): + """Stream decodes a iterator.""" + + if r.encoding is None: + for item in iterator: + yield item + return + + decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace') + for chunk in iterator: + rv = decoder.decode(chunk) + if rv: + yield rv + rv = decoder.decode('', final=True) + if rv: + yield rv + + +def get_unicode_from_response(r): + """Returns the requested content back in unicode. + + :param r: Response object to get unicode content from. + + Tried: + + 1. charset from content-type + + 2. every encodings from ``<meta ... charset=XXX>`` + + 3. fall back and replace all unicode characters + + """ + + tried_encodings = [] + + # Try charset from content-type + encoding = get_encoding_from_headers(r.headers) + + if encoding: + try: + return unicode(r.content, encoding) + except UnicodeError: + tried_encodings.append(encoding) + + # Fall back: + try: + return unicode(r.content, encoding, errors='replace') + except TypeError: + return r.content + + +def decode_gzip(content): + """Return gzip-decoded string. + + :param content: bytestring to gzip-decode. + """ + + return zlib.decompress(content, 16 + zlib.MAX_WBITS) + + +def stream_decompress(iterator, mode='gzip'): + """ + Stream decodes an iterator over compressed data + + :param iterator: An iterator over compressed data + :param mode: 'gzip' or 'deflate' + :return: An iterator over decompressed data + """ + + if mode not in ['gzip', 'deflate']: + raise ValueError('stream_decompress mode must be gzip or deflate') + + zlib_mode = 16 + zlib.MAX_WBITS if mode == 'gzip' else -zlib.MAX_WBITS + dec = zlib.decompressobj(zlib_mode) + try: + for chunk in iterator: + rv = dec.decompress(chunk) + if rv: + yield rv + except zlib.error: + # If there was an error decompressing, just return the raw chunk + yield chunk + # Continue to return the rest of the raw data + for chunk in iterator: + yield chunk + else: + # Make sure everything has been returned from the decompression object + buf = dec.decompress('') + rv = buf + dec.flush() + if rv: + yield rv + + +def requote_path(path): + """Re-quote the given URL path component. + + This function passes the given path through an unquote/quote cycle to + ensure that it is fully and consistently quoted. + """ + parts = path.split("/") + parts = (urllib.quote(urllib.unquote(part), safe="") for part in parts) + return "/".join(parts) diff --git a/simplejson/__init__.py b/simplejson/__init__.py new file mode 100644 index 0000000..7fc8153 --- /dev/null +++ b/simplejson/__init__.py @@ -0,0 +1,547 @@ +r"""JSON (JavaScript Object Notation) <http://json.org> is a subset of +JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data +interchange format. + +:mod:`simplejson` exposes an API familiar to users of the standard library +:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained +version of the :mod:`json` library contained in Python 2.6, but maintains +compatibility with Python 2.4 and Python 2.5 and (currently) has +significant performance advantages, even without using the optional C +extension for speedups. + +Encoding basic Python object hierarchies:: + + >>> import simplejson as json + >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) + '["foo", {"bar": ["baz", null, 1.0, 2]}]' + >>> print(json.dumps("\"foo\bar")) + "\"foo\bar" + >>> print(json.dumps(u'\u1234')) + "\u1234" + >>> print(json.dumps('\\')) + "\\" + >>> print(json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)) + {"a": 0, "b": 0, "c": 0} + >>> from simplejson.compat import StringIO + >>> io = StringIO() + >>> json.dump(['streaming API'], io) + >>> io.getvalue() + '["streaming API"]' + +Compact encoding:: + + >>> import simplejson as json + >>> obj = [1,2,3,{'4': 5, '6': 7}] + >>> json.dumps(obj, separators=(',',':'), sort_keys=True) + '[1,2,3,{"4":5,"6":7}]' + +Pretty printing:: + + >>> import simplejson as json + >>> print(json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=' ')) + { + "4": 5, + "6": 7 + } + +Decoding JSON:: + + >>> import simplejson as json + >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}] + >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj + True + >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' + True + >>> from simplejson.compat import StringIO + >>> io = StringIO('["streaming API"]') + >>> json.load(io)[0] == 'streaming API' + True + +Specializing JSON object decoding:: + + >>> import simplejson as json + >>> def as_complex(dct): + ... if '__complex__' in dct: + ... return complex(dct['real'], dct['imag']) + ... return dct + ... + >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}', + ... object_hook=as_complex) + (1+2j) + >>> from decimal import Decimal + >>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1') + True + +Specializing JSON object encoding:: + + >>> import simplejson as json + >>> def encode_complex(obj): + ... if isinstance(obj, complex): + ... return [obj.real, obj.imag] + ... raise TypeError(repr(o) + " is not JSON serializable") + ... + >>> json.dumps(2 + 1j, default=encode_complex) + '[2.0, 1.0]' + >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j) + '[2.0, 1.0]' + >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j)) + '[2.0, 1.0]' + + +Using simplejson.tool from the shell to validate and pretty-print:: + + $ echo '{"json":"obj"}' | python -m simplejson.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -m simplejson.tool + Expecting property name: line 1 column 3 (char 2) +""" +from __future__ import absolute_import +__version__ = '3.3.0' +__all__ = [ + 'dump', 'dumps', 'load', 'loads', + 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', + 'OrderedDict', 'simple_first', +] + +__author__ = 'Bob Ippolito <bob@redivi.com>' + +from decimal import Decimal + +from .scanner import JSONDecodeError +from .decoder import JSONDecoder +from .encoder import JSONEncoder, JSONEncoderForHTML +def _import_OrderedDict(): + import collections + try: + return collections.OrderedDict + except AttributeError: + from . import ordered_dict + return ordered_dict.OrderedDict +OrderedDict = _import_OrderedDict() + +def _import_c_make_encoder(): + try: + from ._speedups import make_encoder + return make_encoder + except ImportError: + return None + +_default_encoder = JSONEncoder( + skipkeys=False, + ensure_ascii=True, + check_circular=True, + allow_nan=True, + indent=None, + separators=None, + encoding='utf-8', + default=None, + use_decimal=True, + namedtuple_as_object=True, + tuple_as_array=True, + bigint_as_string=False, + item_sort_key=None, + for_json=False, + ignore_nan=False, +) + +def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, use_decimal=True, + namedtuple_as_object=True, tuple_as_array=True, + bigint_as_string=False, sort_keys=False, item_sort_key=None, + for_json=False, ignore_nan=False, **kw): + """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a + ``.write()``-supporting file-like object). + + If *skipkeys* is true then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If *ensure_ascii* is false, then the some chunks written to ``fp`` + may be ``unicode`` instances, subject to normal Python ``str`` to + ``unicode`` coercion rules. Unless ``fp.write()`` explicitly + understands ``unicode`` (as in ``codecs.getwriter()``) this is likely + to cause an error. + + If *check_circular* is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If *allow_nan* is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) + in strict compliance of the original JSON specification, instead of using + the JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). See + *ignore_nan* for ECMA-262 compliant behavior. + + If *indent* is a string, then JSON array elements and object members + will be pretty-printed with a newline followed by that string repeated + for each level of nesting. ``None`` (the default) selects the most compact + representation without any newlines. For backwards compatibility with + versions of simplejson earlier than 2.1.0, an integer is also accepted + and is converted to a string with that many spaces. + + If specified, *separators* should be an + ``(item_separator, key_separator)`` tuple. The default is ``(', ', ': ')`` + if *indent* is ``None`` and ``(',', ': ')`` otherwise. To get the most + compact JSON representation, you should specify ``(',', ':')`` to eliminate + whitespace. + + *encoding* is the character encoding for str instances, default is UTF-8. + + *default(obj)* is a function that should return a serializable version + of obj or raise ``TypeError``. The default simply raises ``TypeError``. + + If *use_decimal* is true (default: ``True``) then decimal.Decimal + will be natively serialized to JSON with full precision. + + If *namedtuple_as_object* is true (default: ``True``), + :class:`tuple` subclasses with ``_asdict()`` methods will be encoded + as JSON objects. + + If *tuple_as_array* is true (default: ``True``), + :class:`tuple` (and subclasses) will be encoded as JSON arrays. + + If *bigint_as_string* is true (default: ``False``), ints 2**53 and higher + or lower than -2**53 will be encoded as strings. This is to avoid the + rounding that happens in Javascript otherwise. Note that this is still a + lossy operation that will not round-trip correctly and should be used + sparingly. + + If specified, *item_sort_key* is a callable used to sort the items in + each dictionary. This is useful if you want to sort items other than + in alphabetical order by key. This option takes precedence over + *sort_keys*. + + If *sort_keys* is true (default: ``False``), the output of dictionaries + will be sorted by item. + + If *for_json* is true (default: ``False``), objects with a ``for_json()`` + method will use the return value of that method for encoding as JSON + instead of the object. + + If *ignore_nan* is true (default: ``False``), then out of range + :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized as + ``null`` in compliance with the ECMA-262 specification. If true, this will + override *allow_nan*. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. NOTE: You should use *default* or *for_json* instead + of subclassing whenever possible. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and use_decimal + and namedtuple_as_object and tuple_as_array + and not bigint_as_string and not item_sort_key + and not for_json and not ignore_nan and not kw): + iterable = _default_encoder.iterencode(obj) + else: + if cls is None: + cls = JSONEncoder + iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, + default=default, use_decimal=use_decimal, + namedtuple_as_object=namedtuple_as_object, + tuple_as_array=tuple_as_array, + bigint_as_string=bigint_as_string, + sort_keys=sort_keys, + item_sort_key=item_sort_key, + for_json=for_json, + ignore_nan=ignore_nan, + **kw).iterencode(obj) + # could accelerate with writelines in some versions of Python, at + # a debuggability cost + for chunk in iterable: + fp.write(chunk) + + +def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, use_decimal=True, + namedtuple_as_object=True, tuple_as_array=True, + bigint_as_string=False, sort_keys=False, item_sort_key=None, + for_json=False, ignore_nan=False, **kw): + """Serialize ``obj`` to a JSON formatted ``str``. + + If ``skipkeys`` is false then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is false, then the return value will be a + ``unicode`` instance subject to normal Python ``str`` to ``unicode`` + coercion rules instead of being escaped to an ASCII ``str``. + + If ``check_circular`` is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in + strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a string, then JSON array elements and object members + will be pretty-printed with a newline followed by that string repeated + for each level of nesting. ``None`` (the default) selects the most compact + representation without any newlines. For backwards compatibility with + versions of simplejson earlier than 2.1.0, an integer is also accepted + and is converted to a string with that many spaces. + + If specified, ``separators`` should be an + ``(item_separator, key_separator)`` tuple. The default is ``(', ', ': ')`` + if *indent* is ``None`` and ``(',', ': ')`` otherwise. To get the most + compact JSON representation, you should specify ``(',', ':')`` to eliminate + whitespace. + + ``encoding`` is the character encoding for str instances, default is UTF-8. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + If *use_decimal* is true (default: ``True``) then decimal.Decimal + will be natively serialized to JSON with full precision. + + If *namedtuple_as_object* is true (default: ``True``), + :class:`tuple` subclasses with ``_asdict()`` methods will be encoded + as JSON objects. + + If *tuple_as_array* is true (default: ``True``), + :class:`tuple` (and subclasses) will be encoded as JSON arrays. + + If *bigint_as_string* is true (not the default), ints 2**53 and higher + or lower than -2**53 will be encoded as strings. This is to avoid the + rounding that happens in Javascript otherwise. + + If specified, *item_sort_key* is a callable used to sort the items in + each dictionary. This is useful if you want to sort items other than + in alphabetical order by key. This option takes precendence over + *sort_keys*. + + If *sort_keys* is true (default: ``False``), the output of dictionaries + will be sorted by item. + + If *for_json* is true (default: ``False``), objects with a ``for_json()`` + method will use the return value of that method for encoding as JSON + instead of the object. + + If *ignore_nan* is true (default: ``False``), then out of range + :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized as + ``null`` in compliance with the ECMA-262 specification. If true, this will + override *allow_nan*. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. NOTE: You should use *default* instead of subclassing + whenever possible. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and use_decimal + and namedtuple_as_object and tuple_as_array + and not bigint_as_string and not sort_keys + and not item_sort_key and not for_json + and not ignore_nan and not kw): + return _default_encoder.encode(obj) + if cls is None: + cls = JSONEncoder + return cls( + skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, default=default, + use_decimal=use_decimal, + namedtuple_as_object=namedtuple_as_object, + tuple_as_array=tuple_as_array, + bigint_as_string=bigint_as_string, + sort_keys=sort_keys, + item_sort_key=item_sort_key, + for_json=for_json, + ignore_nan=ignore_nan, + **kw).encode(obj) + + +_default_decoder = JSONDecoder(encoding=None, object_hook=None, + object_pairs_hook=None) + + +def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, object_pairs_hook=None, + use_decimal=False, namedtuple_as_object=True, tuple_as_array=True, + **kw): + """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing + a JSON document) to a Python object. + + *encoding* determines the encoding used to interpret any + :class:`str` objects decoded by this instance (``'utf-8'`` by + default). It has no effect when decoding :class:`unicode` objects. + + Note that currently only encodings that are a superset of ASCII work, + strings of other encodings should be passed in as :class:`unicode`. + + *object_hook*, if specified, will be called with the result of every + JSON object decoded and its return value will be used in place of the + given :class:`dict`. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + + *object_pairs_hook* is an optional function that will be called with + the result of any object literal decode with an ordered list of pairs. + The return value of *object_pairs_hook* will be used instead of the + :class:`dict`. This feature can be used to implement custom decoders + that rely on the order that the key and value pairs are decoded (for + example, :func:`collections.OrderedDict` will remember the order of + insertion). If *object_hook* is also defined, the *object_pairs_hook* + takes priority. + + *parse_float*, if specified, will be called with the string of every + JSON float to be decoded. By default, this is equivalent to + ``float(num_str)``. This can be used to use another datatype or parser + for JSON floats (e.g. :class:`decimal.Decimal`). + + *parse_int*, if specified, will be called with the string of every + JSON int to be decoded. By default, this is equivalent to + ``int(num_str)``. This can be used to use another datatype or parser + for JSON integers (e.g. :class:`float`). + + *parse_constant*, if specified, will be called with one of the + following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This + can be used to raise an exception if invalid JSON numbers are + encountered. + + If *use_decimal* is true (default: ``False``) then it implies + parse_float=decimal.Decimal for parity with ``dump``. + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead + of subclassing whenever possible. + + """ + return loads(fp.read(), + encoding=encoding, cls=cls, object_hook=object_hook, + parse_float=parse_float, parse_int=parse_int, + parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, + use_decimal=use_decimal, **kw) + + +def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, object_pairs_hook=None, + use_decimal=False, **kw): + """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON + document) to a Python object. + + *encoding* determines the encoding used to interpret any + :class:`str` objects decoded by this instance (``'utf-8'`` by + default). It has no effect when decoding :class:`unicode` objects. + + Note that currently only encodings that are a superset of ASCII work, + strings of other encodings should be passed in as :class:`unicode`. + + *object_hook*, if specified, will be called with the result of every + JSON object decoded and its return value will be used in place of the + given :class:`dict`. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + + *object_pairs_hook* is an optional function that will be called with + the result of any object literal decode with an ordered list of pairs. + The return value of *object_pairs_hook* will be used instead of the + :class:`dict`. This feature can be used to implement custom decoders + that rely on the order that the key and value pairs are decoded (for + example, :func:`collections.OrderedDict` will remember the order of + insertion). If *object_hook* is also defined, the *object_pairs_hook* + takes priority. + + *parse_float*, if specified, will be called with the string of every + JSON float to be decoded. By default, this is equivalent to + ``float(num_str)``. This can be used to use another datatype or parser + for JSON floats (e.g. :class:`decimal.Decimal`). + + *parse_int*, if specified, will be called with the string of every + JSON int to be decoded. By default, this is equivalent to + ``int(num_str)``. This can be used to use another datatype or parser + for JSON integers (e.g. :class:`float`). + + *parse_constant*, if specified, will be called with one of the + following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This + can be used to raise an exception if invalid JSON numbers are + encountered. + + If *use_decimal* is true (default: ``False``) then it implies + parse_float=decimal.Decimal for parity with ``dump``. + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead + of subclassing whenever possible. + + """ + if (cls is None and encoding is None and object_hook is None and + parse_int is None and parse_float is None and + parse_constant is None and object_pairs_hook is None + and not use_decimal and not kw): + return _default_decoder.decode(s) + if cls is None: + cls = JSONDecoder + if object_hook is not None: + kw['object_hook'] = object_hook + if object_pairs_hook is not None: + kw['object_pairs_hook'] = object_pairs_hook + if parse_float is not None: + kw['parse_float'] = parse_float + if parse_int is not None: + kw['parse_int'] = parse_int + if parse_constant is not None: + kw['parse_constant'] = parse_constant + if use_decimal: + if parse_float is not None: + raise TypeError("use_decimal=True implies parse_float=Decimal") + kw['parse_float'] = Decimal + return cls(encoding=encoding, **kw).decode(s) + + +def _toggle_speedups(enabled): + from . import decoder as dec + from . import encoder as enc + from . import scanner as scan + c_make_encoder = _import_c_make_encoder() + if enabled: + dec.scanstring = dec.c_scanstring or dec.py_scanstring + enc.c_make_encoder = c_make_encoder + enc.encode_basestring_ascii = (enc.c_encode_basestring_ascii or + enc.py_encode_basestring_ascii) + scan.make_scanner = scan.c_make_scanner or scan.py_make_scanner + else: + dec.scanstring = dec.py_scanstring + enc.c_make_encoder = None + enc.encode_basestring_ascii = enc.py_encode_basestring_ascii + scan.make_scanner = scan.py_make_scanner + dec.make_scanner = scan.make_scanner + global _default_decoder + _default_decoder = JSONDecoder( + encoding=None, + object_hook=None, + object_pairs_hook=None, + ) + global _default_encoder + _default_encoder = JSONEncoder( + skipkeys=False, + ensure_ascii=True, + check_circular=True, + allow_nan=True, + indent=None, + separators=None, + encoding='utf-8', + default=None, + ) + +def simple_first(kv): + """Helper function to pass to item_sort_key to sort simple + elements to the top, then container elements. + """ + return (isinstance(kv[1], (list, dict, tuple)), kv[0]) diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c new file mode 100644 index 0000000..e888873 --- /dev/null +++ b/simplejson/_speedups.c @@ -0,0 +1,3296 @@ +/* -*- mode: C; c-file-style: "python"; c-basic-offset: 4 -*- */ +#include "Python.h" +#include "structmember.h" + +#if PY_MAJOR_VERSION >= 3 +#define PyInt_FromSsize_t PyLong_FromSsize_t +#define PyInt_AsSsize_t PyLong_AsSsize_t +#define PyString_Check PyBytes_Check +#define PyString_GET_SIZE PyBytes_GET_SIZE +#define PyString_AS_STRING PyBytes_AS_STRING +#define PyString_FromStringAndSize PyBytes_FromStringAndSize +#define PyInt_Check(obj) 0 +#define JSON_UNICHR Py_UCS4 +#define JSON_InternFromString PyUnicode_InternFromString +#define JSON_Intern_GET_SIZE PyUnicode_GET_SIZE +#define JSON_ASCII_Check PyUnicode_Check +#define JSON_ASCII_AS_STRING PyUnicode_AsUTF8 +#define PyInt_Type PyLong_Type +#define PyInt_FromString PyLong_FromString +#define PY2_UNUSED +#define PY3_UNUSED UNUSED +#define JSON_NewEmptyUnicode() PyUnicode_New(0, 127) +#else /* PY_MAJOR_VERSION >= 3 */ +#define PY2_UNUSED UNUSED +#define PY3_UNUSED +#define PyUnicode_READY(obj) 0 +#define PyUnicode_KIND(obj) (sizeof(Py_UNICODE)) +#define PyUnicode_DATA(obj) ((void *)(PyUnicode_AS_UNICODE(obj))) +#define PyUnicode_READ(kind, data, index) ((JSON_UNICHR)((const Py_UNICODE *)(data))[(index)]) +#define PyUnicode_GetLength PyUnicode_GET_SIZE +#define JSON_UNICHR Py_UNICODE +#define JSON_ASCII_Check PyString_Check +#define JSON_ASCII_AS_STRING PyString_AS_STRING +#define JSON_InternFromString PyString_InternFromString +#define JSON_Intern_GET_SIZE PyString_GET_SIZE +#define JSON_NewEmptyUnicode() PyUnicode_FromUnicode(NULL, 0) +#endif /* PY_MAJOR_VERSION < 3 */ + +#if PY_VERSION_HEX < 0x02070000 +#if !defined(PyOS_string_to_double) +#define PyOS_string_to_double json_PyOS_string_to_double +static double +json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception); +static double +json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception) +{ + double x; + assert(endptr == NULL); + assert(overflow_exception == NULL); + PyFPE_START_PROTECT("json_PyOS_string_to_double", return -1.0;) + x = PyOS_ascii_atof(s); + PyFPE_END_PROTECT(x) + return x; +} +#endif +#endif /* PY_VERSION_HEX < 0x02070000 */ + +#if PY_VERSION_HEX < 0x02060000 +#if !defined(Py_TYPE) +#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) +#endif +#if !defined(Py_SIZE) +#define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size) +#endif +#if !defined(PyVarObject_HEAD_INIT) +#define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size, +#endif +#endif /* PY_VERSION_HEX < 0x02060000 */ + +#if PY_VERSION_HEX < 0x02050000 +#if !defined(PY_SSIZE_T_MIN) +typedef int Py_ssize_t; +#define PY_SSIZE_T_MAX INT_MAX +#define PY_SSIZE_T_MIN INT_MIN +#define PyInt_FromSsize_t PyInt_FromLong +#define PyInt_AsSsize_t PyInt_AsLong +#endif +#if !defined(Py_IS_FINITE) +#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X)) +#endif +#endif /* PY_VERSION_HEX < 0x02050000 */ + +#ifdef __GNUC__ +#define UNUSED __attribute__((__unused__)) +#else +#define UNUSED +#endif + +#define DEFAULT_ENCODING "utf-8" + +#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType) +#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType) +#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType) +#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType) + +#define JSON_ALLOW_NAN 1 +#define JSON_IGNORE_NAN 2 + +static PyTypeObject PyScannerType; +static PyTypeObject PyEncoderType; + +typedef struct { + PyObject *large_strings; /* A list of previously accumulated large strings */ + PyObject *small_strings; /* Pending small strings */ +} JSON_Accu; + +static int +JSON_Accu_Init(JSON_Accu *acc); +static int +JSON_Accu_Accumulate(JSON_Accu *acc, PyObject *unicode); +static PyObject * +JSON_Accu_FinishAsList(JSON_Accu *acc); +static void +JSON_Accu_Destroy(JSON_Accu *acc); + +#define ERR_EXPECTING_VALUE "Expecting value" +#define ERR_ARRAY_DELIMITER "Expecting ',' delimiter or ']'" +#define ERR_ARRAY_VALUE_FIRST "Expecting value or ']'" +#define ERR_OBJECT_DELIMITER "Expecting ',' delimiter or '}'" +#define ERR_OBJECT_PROPERTY "Expecting property name enclosed in double quotes" +#define ERR_OBJECT_PROPERTY_FIRST "Expecting property name enclosed in double quotes or '}'" +#define ERR_OBJECT_PROPERTY_DELIMITER "Expecting ':' delimiter" +#define ERR_STRING_UNTERMINATED "Unterminated string starting at" +#define ERR_STRING_CONTROL "Invalid control character %r at" +#define ERR_STRING_ESC1 "Invalid \\X escape sequence %r" +#define ERR_STRING_ESC4 "Invalid \\uXXXX escape sequence" + +typedef struct _PyScannerObject { + PyObject_HEAD + PyObject *encoding; + PyObject *strict; + PyObject *object_hook; + PyObject *pairs_hook; + PyObject *parse_float; + PyObject *parse_int; + PyObject *parse_constant; + PyObject *memo; +} PyScannerObject; + +static PyMemberDef scanner_members[] = { + {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"}, + {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"}, + {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"}, + {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"}, + {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"}, + {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"}, + {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"}, + {NULL} +}; + +typedef struct _PyEncoderObject { + PyObject_HEAD + PyObject *markers; + PyObject *defaultfn; + PyObject *encoder; + PyObject *indent; + PyObject *key_separator; + PyObject *item_separator; + PyObject *sort_keys; + PyObject *key_memo; + PyObject *encoding; + PyObject *Decimal; + PyObject *skipkeys_bool; + int skipkeys; + int fast_encode; + /* 0, JSON_ALLOW_NAN, JSON_IGNORE_NAN */ + int allow_or_ignore_nan; + int use_decimal; + int namedtuple_as_object; + int tuple_as_array; + int bigint_as_string; + PyObject *item_sort_key; + PyObject *item_sort_kw; + int for_json; +} PyEncoderObject; + +static PyMemberDef encoder_members[] = { + {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"}, + {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"}, + {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"}, + {"encoding", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoding"}, + {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"}, + {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"}, + {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"}, + {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"}, + /* Python 2.5 does not support T_BOOl */ + {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys_bool), READONLY, "skipkeys"}, + {"key_memo", T_OBJECT, offsetof(PyEncoderObject, key_memo), READONLY, "key_memo"}, + {"item_sort_key", T_OBJECT, offsetof(PyEncoderObject, item_sort_key), READONLY, "item_sort_key"}, + {NULL} +}; + +static PyObject * +join_list_unicode(PyObject *lst); +static PyObject * +JSON_ParseEncoding(PyObject *encoding); +static PyObject * +JSON_UnicodeFromChar(JSON_UNICHR c); +static PyObject * +maybe_quote_bigint(PyObject *encoded, PyObject *obj); +static Py_ssize_t +ascii_char_size(JSON_UNICHR c); +static Py_ssize_t +ascii_escape_char(JSON_UNICHR c, char *output, Py_ssize_t chars); +static PyObject * +ascii_escape_unicode(PyObject *pystr); +static PyObject * +ascii_escape_str(PyObject *pystr); +static PyObject * +py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr); +#if PY_MAJOR_VERSION < 3 +static PyObject * +join_list_string(PyObject *lst); +static PyObject * +scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +static PyObject * +scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr); +static PyObject * +_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +#endif +static PyObject * +scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr); +static PyObject * +scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +static PyObject * +_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx); +static PyObject * +scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds); +static int +scanner_init(PyObject *self, PyObject *args, PyObject *kwds); +static void +scanner_dealloc(PyObject *self); +static int +scanner_clear(PyObject *self); +static PyObject * +encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds); +static int +encoder_init(PyObject *self, PyObject *args, PyObject *kwds); +static void +encoder_dealloc(PyObject *self); +static int +encoder_clear(PyObject *self); +static PyObject * +encoder_stringify_key(PyEncoderObject *s, PyObject *key); +static int +encoder_listencode_list(PyEncoderObject *s, JSON_Accu *rval, PyObject *seq, Py_ssize_t indent_level); +static int +encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ssize_t indent_level); +static int +encoder_listencode_dict(PyEncoderObject *s, JSON_Accu *rval, PyObject *dct, Py_ssize_t indent_level); +static PyObject * +_encoded_const(PyObject *obj); +static void +raise_errmsg(char *msg, PyObject *s, Py_ssize_t end); +static PyObject * +encoder_encode_string(PyEncoderObject *s, PyObject *obj); +static int +_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr); +static PyObject * +_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr); +static PyObject * +encoder_encode_float(PyEncoderObject *s, PyObject *obj); +static int +_is_namedtuple(PyObject *obj); +static int +_has_for_json_hook(PyObject *obj); +static PyObject * +moduleinit(void); + +#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"') +#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r')) + +#define MIN_EXPANSION 6 + +static int +JSON_Accu_Init(JSON_Accu *acc) +{ + /* Lazily allocated */ + acc->large_strings = NULL; + acc->small_strings = PyList_New(0); + if (acc->small_strings == NULL) + return -1; + return 0; +} + +static int +flush_accumulator(JSON_Accu *acc) +{ + Py_ssize_t nsmall = PyList_GET_SIZE(acc->small_strings); + if (nsmall) { + int ret; + PyObject *joined; + if (acc->large_strings == NULL) { + acc->large_strings = PyList_New(0); + if (acc->large_strings == NULL) + return -1; + } +#if PY_MAJOR_VERSION >= 3 + joined = join_list_unicode(acc->small_strings); +#else /* PY_MAJOR_VERSION >= 3 */ + joined = join_list_string(acc->small_strings); +#endif /* PY_MAJOR_VERSION < 3 */ + if (joined == NULL) + return -1; + if (PyList_SetSlice(acc->small_strings, 0, nsmall, NULL)) { + Py_DECREF(joined); + return -1; + } + ret = PyList_Append(acc->large_strings, joined); + Py_DECREF(joined); + return ret; + } + return 0; +} + +static int +JSON_Accu_Accumulate(JSON_Accu *acc, PyObject *unicode) +{ + Py_ssize_t nsmall; +#if PY_MAJOR_VERSION >= 3 + assert(PyUnicode_Check(unicode)); +#else /* PY_MAJOR_VERSION >= 3 */ + assert(JSON_ASCII_Check(unicode) || PyUnicode_Check(unicode)); +#endif /* PY_MAJOR_VERSION < 3 */ + + if (PyList_Append(acc->small_strings, unicode)) + return -1; + nsmall = PyList_GET_SIZE(acc->small_strings); + /* Each item in a list of unicode objects has an overhead (in 64-bit + * builds) of: + * - 8 bytes for the list slot + * - 56 bytes for the header of the unicode object + * that is, 64 bytes. 100000 such objects waste more than 6MB + * compared to a single concatenated string. + */ + if (nsmall < 100000) + return 0; + return flush_accumulator(acc); +} + +static PyObject * +JSON_Accu_FinishAsList(JSON_Accu *acc) +{ + int ret; + PyObject *res; + + ret = flush_accumulator(acc); + Py_CLEAR(acc->small_strings); + if (ret) { + Py_CLEAR(acc->large_strings); + return NULL; + } + res = acc->large_strings; + acc->large_strings = NULL; + if (res == NULL) + return PyList_New(0); + return res; +} + +static void +JSON_Accu_Destroy(JSON_Accu *acc) +{ + Py_CLEAR(acc->small_strings); + Py_CLEAR(acc->large_strings); +} + +static int +IS_DIGIT(JSON_UNICHR c) +{ + return c >= '0' && c <= '9'; +} + +static PyObject * +JSON_UnicodeFromChar(JSON_UNICHR c) +{ +#if PY_MAJOR_VERSION >= 3 + PyObject *rval = PyUnicode_New(1, c); + if (rval) + PyUnicode_WRITE(PyUnicode_KIND(rval), PyUnicode_DATA(rval), 0, c); + return rval; +#else /* PY_MAJOR_VERSION >= 3 */ + return PyUnicode_FromUnicode(&c, 1); +#endif /* PY_MAJOR_VERSION < 3 */ +} + +static PyObject * +maybe_quote_bigint(PyObject *encoded, PyObject *obj) +{ + static PyObject *big_long = NULL; + static PyObject *small_long = NULL; + if (big_long == NULL) { + big_long = PyLong_FromLongLong(1LL << 53); + if (big_long == NULL) { + Py_DECREF(encoded); + return NULL; + } + } + if (small_long == NULL) { + small_long = PyLong_FromLongLong(-1LL << 53); + if (small_long == NULL) { + Py_DECREF(encoded); + return NULL; + } + } + if (PyObject_RichCompareBool(obj, big_long, Py_GE) || + PyObject_RichCompareBool(obj, small_long, Py_LE)) { +#if PY_MAJOR_VERSION >= 3 + PyObject* quoted = PyUnicode_FromFormat("\"%U\"", encoded); +#else + PyObject* quoted = PyString_FromFormat("\"%s\"", + PyString_AsString(encoded)); +#endif + Py_DECREF(encoded); + encoded = quoted; + } + return encoded; +} + +static int +_is_namedtuple(PyObject *obj) +{ + int rval = 0; + PyObject *_asdict = PyObject_GetAttrString(obj, "_asdict"); + if (_asdict == NULL) { + PyErr_Clear(); + return 0; + } + rval = PyCallable_Check(_asdict); + Py_DECREF(_asdict); + return rval; +} + +static int +_has_for_json_hook(PyObject *obj) +{ + int rval = 0; + PyObject *for_json = PyObject_GetAttrString(obj, "for_json"); + if (for_json == NULL) { + PyErr_Clear(); + return 0; + } + rval = PyCallable_Check(for_json); + Py_DECREF(for_json); + return rval; +} + +static int +_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr) +{ + /* PyObject to Py_ssize_t converter */ + *size_ptr = PyInt_AsSsize_t(o); + if (*size_ptr == -1 && PyErr_Occurred()) + return 0; + return 1; +} + +static PyObject * +_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr) +{ + /* Py_ssize_t to PyObject converter */ + return PyInt_FromSsize_t(*size_ptr); +} + +static Py_ssize_t +ascii_escape_char(JSON_UNICHR c, char *output, Py_ssize_t chars) +{ + /* Escape unicode code point c to ASCII escape sequences + in char *output. output must have at least 12 bytes unused to + accommodate an escaped surrogate pair "\uXXXX\uXXXX" */ + if (S_CHAR(c)) { + output[chars++] = (char)c; + } + else { + output[chars++] = '\\'; + switch (c) { + case '\\': output[chars++] = (char)c; break; + case '"': output[chars++] = (char)c; break; + case '\b': output[chars++] = 'b'; break; + case '\f': output[chars++] = 'f'; break; + case '\n': output[chars++] = 'n'; break; + case '\r': output[chars++] = 'r'; break; + case '\t': output[chars++] = 't'; break; + default: +#if defined(Py_UNICODE_WIDE) || PY_MAJOR_VERSION >= 3 + if (c >= 0x10000) { + /* UTF-16 surrogate pair */ + JSON_UNICHR v = c - 0x10000; + c = 0xd800 | ((v >> 10) & 0x3ff); + output[chars++] = 'u'; + output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; + output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + c = 0xdc00 | (v & 0x3ff); + output[chars++] = '\\'; + } +#endif + output[chars++] = 'u'; + output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; + output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + } + } + return chars; +} + +static Py_ssize_t +ascii_char_size(JSON_UNICHR c) +{ + if (S_CHAR(c)) { + return 1; + } + else if (c == '\\' || + c == '"' || + c == '\b' || + c == '\f' || + c == '\n' || + c == '\r' || + c == '\t') { + return 2; + } +#if defined(Py_UNICODE_WIDE) || PY_MAJOR_VERSION >= 3 + else if (c >= 0x10000U) { + return 2 * MIN_EXPANSION; + } +#endif + else { + return MIN_EXPANSION; + } +} + +static PyObject * +ascii_escape_unicode(PyObject *pystr) +{ + /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */ + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t output_size; + Py_ssize_t chars; + PY2_UNUSED int kind; + void *data; + PyObject *rval; + char *output; + + if (PyUnicode_READY(pystr)) + return NULL; + + kind = PyUnicode_KIND(pystr); + data = PyUnicode_DATA(pystr); + input_chars = PyUnicode_GetLength(pystr); + output_size = 2; + for (i = 0; i < input_chars; i++) { + output_size += ascii_char_size(PyUnicode_READ(kind, data, i)); + } +#if PY_MAJOR_VERSION >= 3 + rval = PyUnicode_New(output_size, 127); + if (rval == NULL) { + return NULL; + } + assert(PyUnicode_KIND(rval) == PyUnicode_1BYTE_KIND); + output = (char *)PyUnicode_DATA(rval); +#else + rval = PyString_FromStringAndSize(NULL, output_size); + if (rval == NULL) { + return NULL; + } + output = PyString_AS_STRING(rval); +#endif + chars = 0; + output[chars++] = '"'; + for (i = 0; i < input_chars; i++) { + chars = ascii_escape_char(PyUnicode_READ(kind, data, i), output, chars); + } + output[chars++] = '"'; + assert(chars == output_size); + return rval; +} + +#if PY_MAJOR_VERSION >= 3 + +static PyObject * +ascii_escape_str(PyObject *pystr) +{ + PyObject *rval; + PyObject *input = PyUnicode_DecodeUTF8(PyString_AS_STRING(pystr), PyString_GET_SIZE(pystr), NULL); + if (input == NULL) + return NULL; + rval = ascii_escape_unicode(input); + Py_DECREF(input); + return rval; +} + +#else /* PY_MAJOR_VERSION >= 3 */ + +static PyObject * +ascii_escape_str(PyObject *pystr) +{ + /* Take a PyString pystr and return a new ASCII-only escaped PyString */ + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t output_size; + Py_ssize_t chars; + PyObject *rval; + char *output; + char *input_str; + + input_chars = PyString_GET_SIZE(pystr); + input_str = PyString_AS_STRING(pystr); + output_size = 2; + + /* Fast path for a string that's already ASCII */ + for (i = 0; i < input_chars; i++) { + JSON_UNICHR c = (JSON_UNICHR)input_str[i]; + if (c > 0x7f) { + /* We hit a non-ASCII character, bail to unicode mode */ + PyObject *uni; + uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); + if (uni == NULL) { + return NULL; + } + rval = ascii_escape_unicode(uni); + Py_DECREF(uni); + return rval; + } + output_size += ascii_char_size(c); + } + + rval = PyString_FromStringAndSize(NULL, output_size); + if (rval == NULL) { + return NULL; + } + chars = 0; + output = PyString_AS_STRING(rval); + output[chars++] = '"'; + for (i = 0; i < input_chars; i++) { + chars = ascii_escape_char((JSON_UNICHR)input_str[i], output, chars); + } + output[chars++] = '"'; + assert(chars == output_size); + return rval; +} +#endif /* PY_MAJOR_VERSION < 3 */ + +static PyObject * +encoder_stringify_key(PyEncoderObject *s, PyObject *key) +{ + if (PyUnicode_Check(key)) { + Py_INCREF(key); + return key; + } + else if (PyString_Check(key)) { +#if PY_MAJOR_VERSION >= 3 + return PyUnicode_Decode( + PyString_AS_STRING(key), + PyString_GET_SIZE(key), + JSON_ASCII_AS_STRING(s->encoding), + NULL); +#else /* PY_MAJOR_VERSION >= 3 */ + Py_INCREF(key); + return key; +#endif /* PY_MAJOR_VERSION < 3 */ + } + else if (PyFloat_Check(key)) { + return encoder_encode_float(s, key); + } + else if (key == Py_True || key == Py_False || key == Py_None) { + /* This must come before the PyInt_Check because + True and False are also 1 and 0.*/ + return _encoded_const(key); + } + else if (PyInt_Check(key) || PyLong_Check(key)) { + return PyObject_Str(key); + } + else if (s->use_decimal && PyObject_TypeCheck(key, (PyTypeObject *)s->Decimal)) { + return PyObject_Str(key); + } + else if (s->skipkeys) { + Py_INCREF(Py_None); + return Py_None; + } + PyErr_SetString(PyExc_TypeError, "keys must be a string"); + return NULL; +} + +static PyObject * +encoder_dict_iteritems(PyEncoderObject *s, PyObject *dct) +{ + PyObject *items; + PyObject *iter = NULL; + PyObject *lst = NULL; + PyObject *item = NULL; + PyObject *kstr = NULL; + static PyObject *sortfun = NULL; + static PyObject *sortargs = NULL; + + if (sortargs == NULL) { + sortargs = PyTuple_New(0); + if (sortargs == NULL) + return NULL; + } + + if (PyDict_CheckExact(dct)) + items = PyDict_Items(dct); + else + items = PyMapping_Items(dct); + if (items == NULL) + return NULL; + iter = PyObject_GetIter(items); + Py_DECREF(items); + if (iter == NULL) + return NULL; + if (s->item_sort_kw == Py_None) + return iter; + lst = PyList_New(0); + if (lst == NULL) + goto bail; + while ((item = PyIter_Next(iter))) { + PyObject *key, *value; + if (!PyTuple_Check(item) || Py_SIZE(item) != 2) { + PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); + goto bail; + } + key = PyTuple_GET_ITEM(item, 0); + if (key == NULL) + goto bail; +#if PY_MAJOR_VERSION < 3 + else if (PyString_Check(key)) { + /* item can be added as-is */ + } +#endif /* PY_MAJOR_VERSION < 3 */ + else if (PyUnicode_Check(key)) { + /* item can be added as-is */ + } + else { + PyObject *tpl; + kstr = encoder_stringify_key(s, key); + if (kstr == NULL) + goto bail; + else if (kstr == Py_None) { + /* skipkeys */ + Py_DECREF(kstr); + continue; + } + value = PyTuple_GET_ITEM(item, 1); + if (value == NULL) + goto bail; + tpl = PyTuple_Pack(2, kstr, value); + if (tpl == NULL) + goto bail; + Py_CLEAR(kstr); + Py_DECREF(item); + item = tpl; + } + if (PyList_Append(lst, item)) + goto bail; + Py_DECREF(item); + } + Py_CLEAR(iter); + if (PyErr_Occurred()) + goto bail; + sortfun = PyObject_GetAttrString(lst, "sort"); + if (sortfun == NULL) + goto bail; + if (!PyObject_Call(sortfun, sortargs, s->item_sort_kw)) + goto bail; + Py_CLEAR(sortfun); + iter = PyObject_GetIter(lst); + Py_CLEAR(lst); + return iter; +bail: + Py_XDECREF(sortfun); + Py_XDECREF(kstr); + Py_XDECREF(item); + Py_XDECREF(lst); + Py_XDECREF(iter); + return NULL; +} + +static void +raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) +{ + /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */ + static PyObject *JSONDecodeError = NULL; + PyObject *exc; + if (JSONDecodeError == NULL) { + PyObject *scanner = PyImport_ImportModule("simplejson.scanner"); + if (scanner == NULL) + return; + JSONDecodeError = PyObject_GetAttrString(scanner, "JSONDecodeError"); + Py_DECREF(scanner); + if (JSONDecodeError == NULL) + return; + } + exc = PyObject_CallFunction(JSONDecodeError, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end); + if (exc) { + PyErr_SetObject(JSONDecodeError, exc); + Py_DECREF(exc); + } +} + +static PyObject * +join_list_unicode(PyObject *lst) +{ + /* return u''.join(lst) */ + static PyObject *joinfn = NULL; + if (joinfn == NULL) { + PyObject *ustr = JSON_NewEmptyUnicode(); + if (ustr == NULL) + return NULL; + + joinfn = PyObject_GetAttrString(ustr, "join"); + Py_DECREF(ustr); + if (joinfn == NULL) + return NULL; + } + return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); +} + +#if PY_MAJOR_VERSION >= 3 +#define join_list_string join_list_unicode +#else /* PY_MAJOR_VERSION >= 3 */ +static PyObject * +join_list_string(PyObject *lst) +{ + /* return ''.join(lst) */ + static PyObject *joinfn = NULL; + if (joinfn == NULL) { + PyObject *ustr = PyString_FromStringAndSize(NULL, 0); + if (ustr == NULL) + return NULL; + + joinfn = PyObject_GetAttrString(ustr, "join"); + Py_DECREF(ustr); + if (joinfn == NULL) + return NULL; + } + return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); +} +#endif /* PY_MAJOR_VERSION < 3 */ + +static PyObject * +_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) +{ + /* return (rval, idx) tuple, stealing reference to rval */ + PyObject *tpl; + PyObject *pyidx; + /* + steal a reference to rval, returns (rval, idx) + */ + if (rval == NULL) { + assert(PyErr_Occurred()); + return NULL; + } + pyidx = PyInt_FromSsize_t(idx); + if (pyidx == NULL) { + Py_DECREF(rval); + return NULL; + } + tpl = PyTuple_New(2); + if (tpl == NULL) { + Py_DECREF(pyidx); + Py_DECREF(rval); + return NULL; + } + PyTuple_SET_ITEM(tpl, 0, rval); + PyTuple_SET_ITEM(tpl, 1, pyidx); + return tpl; +} + +#define APPEND_OLD_CHUNK \ + if (chunk != NULL) { \ + if (chunks == NULL) { \ + chunks = PyList_New(0); \ + if (chunks == NULL) { \ + goto bail; \ + } \ + } \ + if (PyList_Append(chunks, chunk)) { \ + goto bail; \ + } \ + Py_CLEAR(chunk); \ + } + +#if PY_MAJOR_VERSION < 3 +static PyObject * +scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr) +{ + /* Read the JSON string from PyString pystr. + end is the index of the first character after the quote. + encoding is the encoding of pystr (must be an ASCII superset) + if strict is zero then literal control characters are allowed + *next_end_ptr is a return-by-reference index of the character + after the end quote + + Return value is a new PyString (if ASCII-only) or PyUnicode + */ + PyObject *rval; + Py_ssize_t len = PyString_GET_SIZE(pystr); + Py_ssize_t begin = end - 1; + Py_ssize_t next = begin; + int has_unicode = 0; + char *buf = PyString_AS_STRING(pystr); + PyObject *chunks = NULL; + PyObject *chunk = NULL; + PyObject *strchunk = NULL; + + if (len == end) { + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); + goto bail; + } + else if (end < 0 || len < end) { + PyErr_SetString(PyExc_ValueError, "end is out of bounds"); + goto bail; + } + while (1) { + /* Find the end of the string or the next escape */ + Py_UNICODE c = 0; + for (next = end; next < len; next++) { + c = (unsigned char)buf[next]; + if (c == '"' || c == '\\') { + break; + } + else if (strict && c <= 0x1f) { + raise_errmsg(ERR_STRING_CONTROL, pystr, next); + goto bail; + } + else if (c > 0x7f) { + has_unicode = 1; + } + } + if (!(c == '"' || c == '\\')) { + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); + goto bail; + } + /* Pick up this chunk if it's not zero length */ + if (next != end) { + APPEND_OLD_CHUNK +#if PY_MAJOR_VERSION >= 3 + if (!has_unicode) { + chunk = PyUnicode_DecodeASCII(&buf[end], next - end, NULL); + } + else { + chunk = PyUnicode_Decode(&buf[end], next - end, encoding, NULL); + } + if (chunk == NULL) { + goto bail; + } +#else /* PY_MAJOR_VERSION >= 3 */ + strchunk = PyString_FromStringAndSize(&buf[end], next - end); + if (strchunk == NULL) { + goto bail; + } + if (has_unicode) { + chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL); + Py_DECREF(strchunk); + if (chunk == NULL) { + goto bail; + } + } + else { + chunk = strchunk; + } +#endif /* PY_MAJOR_VERSION < 3 */ + } + next++; + if (c == '"') { + end = next; + break; + } + if (next == len) { + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); + goto bail; + } + c = buf[next]; + if (c != 'u') { + /* Non-unicode backslash escapes */ + end = next + 1; + switch (c) { + case '"': break; + case '\\': break; + case '/': break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + default: c = 0; + } + if (c == 0) { + raise_errmsg(ERR_STRING_ESC1, pystr, end - 2); + goto bail; + } + } + else { + c = 0; + next++; + end = next + 4; + if (end >= len) { + raise_errmsg(ERR_STRING_ESC4, pystr, next - 1); + goto bail; + } + /* Decode 4 hex digits */ + for (; next < end; next++) { + JSON_UNICHR digit = (JSON_UNICHR)buf[next]; + c <<= 4; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c |= (digit - 'A' + 10); break; + default: + raise_errmsg(ERR_STRING_ESC4, pystr, end - 5); + goto bail; + } + } +#if (PY_MAJOR_VERSION >= 3 || defined(Py_UNICODE_WIDE)) + /* Surrogate pair */ + if ((c & 0xfc00) == 0xd800) { + if (end + 6 < len && buf[next] == '\\' && buf[next+1] == 'u') { + JSON_UNICHR c2 = 0; + end += 6; + /* Decode 4 hex digits */ + for (next += 2; next < end; next++) { + c2 <<= 4; + JSON_UNICHR digit = buf[next]; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c2 |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c2 |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c2 |= (digit - 'A' + 10); break; + default: + raise_errmsg(ERR_STRING_ESC4, pystr, end - 5); + goto bail; + } + } + if ((c2 & 0xfc00) != 0xdc00) { + /* not a low surrogate, rewind */ + end -= 6; + next = end; + } + else { + c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); + } + } + } +#endif /* PY_MAJOR_VERSION >= 3 || Py_UNICODE_WIDE */ + } + if (c > 0x7f) { + has_unicode = 1; + } + APPEND_OLD_CHUNK +#if PY_MAJOR_VERSION >= 3 + chunk = JSON_UnicodeFromChar(c); + if (chunk == NULL) { + goto bail; + } +#else /* PY_MAJOR_VERSION >= 3 */ + if (has_unicode) { + chunk = JSON_UnicodeFromChar(c); + if (chunk == NULL) { + goto bail; + } + } + else { + char c_char = Py_CHARMASK(c); + chunk = PyString_FromStringAndSize(&c_char, 1); + if (chunk == NULL) { + goto bail; + } + } +#endif + } + + if (chunks == NULL) { + if (chunk != NULL) + rval = chunk; + else + rval = JSON_NewEmptyUnicode(); + } + else { + APPEND_OLD_CHUNK + rval = join_list_string(chunks); + if (rval == NULL) { + goto bail; + } + Py_CLEAR(chunks); + } + + *next_end_ptr = end; + return rval; +bail: + *next_end_ptr = -1; + Py_XDECREF(chunk); + Py_XDECREF(chunks); + return NULL; +} +#endif /* PY_MAJOR_VERSION < 3 */ + +static PyObject * +scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr) +{ + /* Read the JSON string from PyUnicode pystr. + end is the index of the first character after the quote. + if strict is zero then literal control characters are allowed + *next_end_ptr is a return-by-reference index of the character + after the end quote + + Return value is a new PyUnicode + */ + PyObject *rval; + Py_ssize_t begin = end - 1; + Py_ssize_t next = begin; + PY2_UNUSED int kind = PyUnicode_KIND(pystr); + Py_ssize_t len = PyUnicode_GetLength(pystr); + void *buf = PyUnicode_DATA(pystr); + PyObject *chunks = NULL; + PyObject *chunk = NULL; + + if (len == end) { + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); + goto bail; + } + else if (end < 0 || len < end) { + PyErr_SetString(PyExc_ValueError, "end is out of bounds"); + goto bail; + } + while (1) { + /* Find the end of the string or the next escape */ + JSON_UNICHR c = 0; + for (next = end; next < len; next++) { + c = PyUnicode_READ(kind, buf, next); + if (c == '"' || c == '\\') { + break; + } + else if (strict && c <= 0x1f) { + raise_errmsg(ERR_STRING_CONTROL, pystr, next); + goto bail; + } + } + if (!(c == '"' || c == '\\')) { + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); + goto bail; + } + /* Pick up this chunk if it's not zero length */ + if (next != end) { + APPEND_OLD_CHUNK +#if PY_MAJOR_VERSION < 3 + chunk = PyUnicode_FromUnicode(&((const Py_UNICODE *)buf)[end], next - end); +#else + chunk = PyUnicode_Substring(pystr, end, next); +#endif + if (chunk == NULL) { + goto bail; + } + } + next++; + if (c == '"') { + end = next; + break; + } + if (next == len) { + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); + goto bail; + } + c = PyUnicode_READ(kind, buf, next); + if (c != 'u') { + /* Non-unicode backslash escapes */ + end = next + 1; + switch (c) { + case '"': break; + case '\\': break; + case '/': break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + default: c = 0; + } + if (c == 0) { + raise_errmsg(ERR_STRING_ESC1, pystr, end - 2); + goto bail; + } + } + else { + c = 0; + next++; + end = next + 4; + if (end >= len) { + raise_errmsg(ERR_STRING_ESC4, pystr, next - 1); + goto bail; + } + /* Decode 4 hex digits */ + for (; next < end; next++) { + JSON_UNICHR digit = PyUnicode_READ(kind, buf, next); + c <<= 4; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c |= (digit - 'A' + 10); break; + default: + raise_errmsg(ERR_STRING_ESC4, pystr, end - 5); + goto bail; + } + } +#if PY_MAJOR_VERSION >= 3 || defined(Py_UNICODE_WIDE) + /* Surrogate pair */ + if ((c & 0xfc00) == 0xd800) { + JSON_UNICHR c2 = 0; + if (end + 6 < len && + PyUnicode_READ(kind, buf, next) == '\\' && + PyUnicode_READ(kind, buf, next + 1) == 'u') { + end += 6; + /* Decode 4 hex digits */ + for (next += 2; next < end; next++) { + JSON_UNICHR digit = PyUnicode_READ(kind, buf, next); + c2 <<= 4; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c2 |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c2 |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c2 |= (digit - 'A' + 10); break; + default: + raise_errmsg(ERR_STRING_ESC4, pystr, end - 5); + goto bail; + } + } + if ((c2 & 0xfc00) != 0xdc00) { + /* not a low surrogate, rewind */ + end -= 6; + next = end; + } + else { + c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); + } + } + } +#endif + } + APPEND_OLD_CHUNK + chunk = JSON_UnicodeFromChar(c); + if (chunk == NULL) { + goto bail; + } + } + + if (chunks == NULL) { + if (chunk != NULL) + rval = chunk; + else + rval = JSON_NewEmptyUnicode(); + } + else { + APPEND_OLD_CHUNK + rval = join_list_unicode(chunks); + if (rval == NULL) { + goto bail; + } + Py_CLEAR(chunks); + } + *next_end_ptr = end; + return rval; +bail: + *next_end_ptr = -1; + Py_XDECREF(chunk); + Py_XDECREF(chunks); + return NULL; +} + +PyDoc_STRVAR(pydoc_scanstring, + "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n" + "\n" + "Scan the string s for a JSON string. End is the index of the\n" + "character in s after the quote that started the JSON string.\n" + "Unescapes all valid JSON string escape sequences and raises ValueError\n" + "on attempt to decode an invalid string. If strict is False then literal\n" + "control characters are allowed in the string.\n" + "\n" + "Returns a tuple of the decoded string and the index of the character in s\n" + "after the end quote." +); + +static PyObject * +py_scanstring(PyObject* self UNUSED, PyObject *args) +{ + PyObject *pystr; + PyObject *rval; + Py_ssize_t end; + Py_ssize_t next_end = -1; + char *encoding = NULL; + int strict = 1; + if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) { + return NULL; + } + if (encoding == NULL) { + encoding = DEFAULT_ENCODING; + } + if (PyUnicode_Check(pystr)) { + rval = scanstring_unicode(pystr, end, strict, &next_end); + } +#if PY_MAJOR_VERSION < 3 + /* Using a bytes input is unsupported for scanning in Python 3. + It is coerced to str in the decoder before it gets here. */ + else if (PyString_Check(pystr)) { + rval = scanstring_str(pystr, end, encoding, strict, &next_end); + } +#endif + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } + return _build_rval_index_tuple(rval, next_end); +} + +PyDoc_STRVAR(pydoc_encode_basestring_ascii, + "encode_basestring_ascii(basestring) -> str\n" + "\n" + "Return an ASCII-only JSON representation of a Python string" +); + +static PyObject * +py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr) +{ + /* Return an ASCII-only JSON representation of a Python string */ + /* METH_O */ + if (PyString_Check(pystr)) { + return ascii_escape_str(pystr); + } + else if (PyUnicode_Check(pystr)) { + return ascii_escape_unicode(pystr); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } +} + +static void +scanner_dealloc(PyObject *self) +{ + /* Deallocate scanner object */ + scanner_clear(self); + Py_TYPE(self)->tp_free(self); +} + +static int +scanner_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + Py_VISIT(s->encoding); + Py_VISIT(s->strict); + Py_VISIT(s->object_hook); + Py_VISIT(s->pairs_hook); + Py_VISIT(s->parse_float); + Py_VISIT(s->parse_int); + Py_VISIT(s->parse_constant); + Py_VISIT(s->memo); + return 0; +} + +static int +scanner_clear(PyObject *self) +{ + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + Py_CLEAR(s->encoding); + Py_CLEAR(s->strict); + Py_CLEAR(s->object_hook); + Py_CLEAR(s->pairs_hook); + Py_CLEAR(s->parse_float); + Py_CLEAR(s->parse_int); + Py_CLEAR(s->parse_constant); + Py_CLEAR(s->memo); + return 0; +} + +#if PY_MAJOR_VERSION < 3 +static PyObject * +_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read a JSON object from PyString pystr. + idx is the index of the first character after the opening curly brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing curly brace. + + Returns a new PyObject (usually a dict, but object_hook or + object_pairs_hook can change that) + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + PyObject *rval = NULL; + PyObject *pairs = NULL; + PyObject *item; + PyObject *key = NULL; + PyObject *val = NULL; + char *encoding = JSON_ASCII_AS_STRING(s->encoding); + int strict = PyObject_IsTrue(s->strict); + int has_pairs_hook = (s->pairs_hook != Py_None); + int did_parse = 0; + Py_ssize_t next_idx; + if (has_pairs_hook) { + pairs = PyList_New(0); + if (pairs == NULL) + return NULL; + } + else { + rval = PyDict_New(); + if (rval == NULL) + return NULL; + } + + /* skip whitespace after { */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the object is non-empty */ + if (idx <= end_idx && str[idx] != '}') { + int trailing_delimiter = 0; + while (idx <= end_idx) { + PyObject *memokey; + trailing_delimiter = 0; + + /* read key */ + if (str[idx] != '"') { + raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx); + goto bail; + } + key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx); + if (key == NULL) + goto bail; + memokey = PyDict_GetItem(s->memo, key); + if (memokey != NULL) { + Py_INCREF(memokey); + Py_DECREF(key); + key = memokey; + } + else { + if (PyDict_SetItem(s->memo, key, key) < 0) + goto bail; + } + idx = next_idx; + + /* skip whitespace between key and : delimiter, read :, skip whitespace */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + if (idx > end_idx || str[idx] != ':') { + raise_errmsg(ERR_OBJECT_PROPERTY_DELIMITER, pystr, idx); + goto bail; + } + idx++; + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* read any JSON data type */ + val = scan_once_str(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (has_pairs_hook) { + item = PyTuple_Pack(2, key, val); + if (item == NULL) + goto bail; + Py_CLEAR(key); + Py_CLEAR(val); + if (PyList_Append(pairs, item) == -1) { + Py_DECREF(item); + goto bail; + } + Py_DECREF(item); + } + else { + if (PyDict_SetItem(rval, key, val) < 0) + goto bail; + Py_CLEAR(key); + Py_CLEAR(val); + } + idx = next_idx; + + /* skip whitespace before } or , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the object is closed or we didn't get the , delimiter */ + did_parse = 1; + if (idx > end_idx) break; + if (str[idx] == '}') { + break; + } + else if (str[idx] != ',') { + raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , delimiter */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + trailing_delimiter = 1; + } + if (trailing_delimiter) { + raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx); + goto bail; + } + } + /* verify that idx < end_idx, str[idx] should be '}' */ + if (idx > end_idx || str[idx] != '}') { + if (did_parse) { + raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx); + } else { + raise_errmsg(ERR_OBJECT_PROPERTY_FIRST, pystr, idx); + } + goto bail; + } + + /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */ + if (s->pairs_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL); + if (val == NULL) + goto bail; + Py_DECREF(pairs); + *next_idx_ptr = idx + 1; + return val; + } + + /* if object_hook is not None: rval = object_hook(rval) */ + if (s->object_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); + if (val == NULL) + goto bail; + Py_DECREF(rval); + rval = val; + val = NULL; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(rval); + Py_XDECREF(key); + Py_XDECREF(val); + Py_XDECREF(pairs); + return NULL; +} +#endif /* PY_MAJOR_VERSION < 3 */ + +static PyObject * +_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read a JSON object from PyUnicode pystr. + idx is the index of the first character after the opening curly brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing curly brace. + + Returns a new PyObject (usually a dict, but object_hook can change that) + */ + void *str = PyUnicode_DATA(pystr); + Py_ssize_t end_idx = PyUnicode_GetLength(pystr) - 1; + PY2_UNUSED int kind = PyUnicode_KIND(pystr); + PyObject *rval = NULL; + PyObject *pairs = NULL; + PyObject *item; + PyObject *key = NULL; + PyObject *val = NULL; + int strict = PyObject_IsTrue(s->strict); + int has_pairs_hook = (s->pairs_hook != Py_None); + int did_parse = 0; + Py_ssize_t next_idx; + + if (has_pairs_hook) { + pairs = PyList_New(0); + if (pairs == NULL) + return NULL; + } + else { + rval = PyDict_New(); + if (rval == NULL) + return NULL; + } + + /* skip whitespace after { */ + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + + /* only loop if the object is non-empty */ + if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != '}') { + int trailing_delimiter = 0; + while (idx <= end_idx) { + PyObject *memokey; + trailing_delimiter = 0; + + /* read key */ + if (PyUnicode_READ(kind, str, idx) != '"') { + raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx); + goto bail; + } + key = scanstring_unicode(pystr, idx + 1, strict, &next_idx); + if (key == NULL) + goto bail; + memokey = PyDict_GetItem(s->memo, key); + if (memokey != NULL) { + Py_INCREF(memokey); + Py_DECREF(key); + key = memokey; + } + else { + if (PyDict_SetItem(s->memo, key, key) < 0) + goto bail; + } + idx = next_idx; + + /* skip whitespace between key and : delimiter, read :, skip + whitespace */ + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') { + raise_errmsg(ERR_OBJECT_PROPERTY_DELIMITER, pystr, idx); + goto bail; + } + idx++; + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + + /* read any JSON term */ + val = scan_once_unicode(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (has_pairs_hook) { + item = PyTuple_Pack(2, key, val); + if (item == NULL) + goto bail; + Py_CLEAR(key); + Py_CLEAR(val); + if (PyList_Append(pairs, item) == -1) { + Py_DECREF(item); + goto bail; + } + Py_DECREF(item); + } + else { + if (PyDict_SetItem(rval, key, val) < 0) + goto bail; + Py_CLEAR(key); + Py_CLEAR(val); + } + idx = next_idx; + + /* skip whitespace before } or , */ + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + + /* bail if the object is closed or we didn't get the , + delimiter */ + did_parse = 1; + if (idx > end_idx) break; + if (PyUnicode_READ(kind, str, idx) == '}') { + break; + } + else if (PyUnicode_READ(kind, str, idx) != ',') { + raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , delimiter */ + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + trailing_delimiter = 1; + } + if (trailing_delimiter) { + raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx); + goto bail; + } + } + + /* verify that idx < end_idx, str[idx] should be '}' */ + if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') { + if (did_parse) { + raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx); + } else { + raise_errmsg(ERR_OBJECT_PROPERTY_FIRST, pystr, idx); + } + goto bail; + } + + /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */ + if (s->pairs_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL); + if (val == NULL) + goto bail; + Py_DECREF(pairs); + *next_idx_ptr = idx + 1; + return val; + } + + /* if object_hook is not None: rval = object_hook(rval) */ + if (s->object_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); + if (val == NULL) + goto bail; + Py_DECREF(rval); + rval = val; + val = NULL; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(rval); + Py_XDECREF(key); + Py_XDECREF(val); + Py_XDECREF(pairs); + return NULL; +} + +#if PY_MAJOR_VERSION < 3 +static PyObject * +_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read a JSON array from PyString pystr. + idx is the index of the first character after the opening brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing brace. + + Returns a new PyList + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyList_New(0); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after [ */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the array is non-empty */ + if (idx <= end_idx && str[idx] != ']') { + int trailing_delimiter = 0; + while (idx <= end_idx) { + trailing_delimiter = 0; + /* read any JSON term and de-tuplefy the (rval, idx) */ + val = scan_once_str(s, pystr, idx, &next_idx); + if (val == NULL) { + goto bail; + } + + if (PyList_Append(rval, val) == -1) + goto bail; + + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace between term and , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the array is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == ']') { + break; + } + else if (str[idx] != ',') { + raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + trailing_delimiter = 1; + } + if (trailing_delimiter) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + goto bail; + } + } + + /* verify that idx < end_idx, str[idx] should be ']' */ + if (idx > end_idx || str[idx] != ']') { + if (PyList_GET_SIZE(rval)) { + raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx); + } else { + raise_errmsg(ERR_ARRAY_VALUE_FIRST, pystr, idx); + } + goto bail; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} +#endif /* PY_MAJOR_VERSION < 3 */ + +static PyObject * +_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read a JSON array from PyString pystr. + idx is the index of the first character after the opening brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing brace. + + Returns a new PyList + */ + PY2_UNUSED int kind = PyUnicode_KIND(pystr); + void *str = PyUnicode_DATA(pystr); + Py_ssize_t end_idx = PyUnicode_GetLength(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyList_New(0); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after [ */ + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + + /* only loop if the array is non-empty */ + if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != ']') { + int trailing_delimiter = 0; + while (idx <= end_idx) { + trailing_delimiter = 0; + /* read any JSON term */ + val = scan_once_unicode(s, pystr, idx, &next_idx); + if (val == NULL) { + goto bail; + } + + if (PyList_Append(rval, val) == -1) + goto bail; + + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace between term and , */ + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + + /* bail if the array is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (PyUnicode_READ(kind, str, idx) == ']') { + break; + } + else if (PyUnicode_READ(kind, str, idx) != ',') { + raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , */ + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + trailing_delimiter = 1; + } + if (trailing_delimiter) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + goto bail; + } + } + + /* verify that idx < end_idx, str[idx] should be ']' */ + if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') { + if (PyList_GET_SIZE(rval)) { + raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx); + } else { + raise_errmsg(ERR_ARRAY_VALUE_FIRST, pystr, idx); + } + goto bail; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read a JSON constant from PyString pystr. + constant is the constant string that was found + ("NaN", "Infinity", "-Infinity"). + idx is the index of the first character of the constant + *next_idx_ptr is a return-by-reference index to the first character after + the constant. + + Returns the result of parse_constant + */ + PyObject *cstr; + PyObject *rval; + /* constant is "NaN", "Infinity", or "-Infinity" */ + cstr = JSON_InternFromString(constant); + if (cstr == NULL) + return NULL; + + /* rval = parse_constant(constant) */ + rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL); + idx += JSON_Intern_GET_SIZE(cstr); + Py_DECREF(cstr); + *next_idx_ptr = idx; + return rval; +} + +#if PY_MAJOR_VERSION < 3 +static PyObject * +_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) +{ + /* Read a JSON number from PyString pystr. + idx is the index of the first character of the number + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of that number: + PyInt, PyLong, or PyFloat. + May return other types if parse_int or parse_float are set + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + Py_ssize_t idx = start; + int is_float = 0; + PyObject *rval; + PyObject *numstr; + + /* read a sign if it's there, make sure it's not the end of the string */ + if (str[idx] == '-') { + if (idx >= end_idx) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + return NULL; + } + idx++; + } + + /* read as many integer digits as we find as long as it doesn't start with 0 */ + if (str[idx] >= '1' && str[idx] <= '9') { + idx++; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + /* if it starts with 0 we only expect one integer digit */ + else if (str[idx] == '0') { + idx++; + } + /* no integer digits, error */ + else { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + return NULL; + } + + /* if the next char is '.' followed by a digit then read all float digits */ + if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { + is_float = 1; + idx += 2; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + + /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ + if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { + + /* save the index of the 'e' or 'E' just in case we need to backtrack */ + Py_ssize_t e_start = idx; + idx++; + + /* read an exponent sign if present */ + if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; + + /* read all digits */ + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + + /* if we got a digit, then parse as float. if not, backtrack */ + if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { + is_float = 1; + } + else { + idx = e_start; + } + } + + /* copy the section we determined to be a number */ + numstr = PyString_FromStringAndSize(&str[start], idx - start); + if (numstr == NULL) + return NULL; + if (is_float) { + /* parse as a float using a fast path if available, otherwise call user defined method */ + if (s->parse_float != (PyObject *)&PyFloat_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); + } + else { + /* rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); */ + double d = PyOS_string_to_double(PyString_AS_STRING(numstr), + NULL, NULL); + if (d == -1.0 && PyErr_Occurred()) + return NULL; + rval = PyFloat_FromDouble(d); + } + } + else { + /* parse as an int using a fast path if available, otherwise call user defined method */ + if (s->parse_int != (PyObject *)&PyInt_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); + } + else { + rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10); + } + } + Py_DECREF(numstr); + *next_idx_ptr = idx; + return rval; +} +#endif /* PY_MAJOR_VERSION < 3 */ + +static PyObject * +_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) +{ + /* Read a JSON number from PyUnicode pystr. + idx is the index of the first character of the number + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of that number: + PyInt, PyLong, or PyFloat. + May return other types if parse_int or parse_float are set + */ + PY2_UNUSED int kind = PyUnicode_KIND(pystr); + void *str = PyUnicode_DATA(pystr); + Py_ssize_t end_idx = PyUnicode_GetLength(pystr) - 1; + Py_ssize_t idx = start; + int is_float = 0; + JSON_UNICHR c; + PyObject *rval; + PyObject *numstr; + + /* read a sign if it's there, make sure it's not the end of the string */ + if (PyUnicode_READ(kind, str, idx) == '-') { + if (idx >= end_idx) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + return NULL; + } + idx++; + } + + /* read as many integer digits as we find as long as it doesn't start with 0 */ + c = PyUnicode_READ(kind, str, idx); + if (c == '0') { + /* if it starts with 0 we only expect one integer digit */ + idx++; + } + else if (IS_DIGIT(c)) { + idx++; + while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) { + idx++; + } + } + else { + /* no integer digits, error */ + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + return NULL; + } + + /* if the next char is '.' followed by a digit then read all float digits */ + if (idx < end_idx && + PyUnicode_READ(kind, str, idx) == '.' && + IS_DIGIT(PyUnicode_READ(kind, str, idx + 1))) { + is_float = 1; + idx += 2; + while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) idx++; + } + + /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ + if (idx < end_idx && + (PyUnicode_READ(kind, str, idx) == 'e' || + PyUnicode_READ(kind, str, idx) == 'E')) { + Py_ssize_t e_start = idx; + idx++; + + /* read an exponent sign if present */ + if (idx < end_idx && + (PyUnicode_READ(kind, str, idx) == '-' || + PyUnicode_READ(kind, str, idx) == '+')) idx++; + + /* read all digits */ + while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) idx++; + + /* if we got a digit, then parse as float. if not, backtrack */ + if (IS_DIGIT(PyUnicode_READ(kind, str, idx - 1))) { + is_float = 1; + } + else { + idx = e_start; + } + } + + /* copy the section we determined to be a number */ +#if PY_MAJOR_VERSION >= 3 + numstr = PyUnicode_Substring(pystr, start, idx); +#else + numstr = PyUnicode_FromUnicode(&((Py_UNICODE *)str)[start], idx - start); +#endif + if (numstr == NULL) + return NULL; + if (is_float) { + /* parse as a float using a fast path if available, otherwise call user defined method */ + if (s->parse_float != (PyObject *)&PyFloat_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); + } + else { +#if PY_MAJOR_VERSION >= 3 + rval = PyFloat_FromString(numstr); +#else + rval = PyFloat_FromString(numstr, NULL); +#endif + } + } + else { + /* no fast path for unicode -> int, just call */ + rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); + } + Py_DECREF(numstr); + *next_idx_ptr = idx; + return rval; +} + +#if PY_MAJOR_VERSION < 3 +static PyObject * +scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read one JSON term (of any kind) from PyString pystr. + idx is the index of the first character of the term + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of the term. + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t length = PyString_GET_SIZE(pystr); + PyObject *rval = NULL; + int fallthrough = 0; + if (idx >= length) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + return NULL; + } + switch (str[idx]) { + case '"': + /* string */ + rval = scanstring_str(pystr, idx + 1, + JSON_ASCII_AS_STRING(s->encoding), + PyObject_IsTrue(s->strict), + next_idx_ptr); + break; + case '{': + /* object */ + if (Py_EnterRecursiveCall(" while decoding a JSON object " + "from a string")) + return NULL; + rval = _parse_object_str(s, pystr, idx + 1, next_idx_ptr); + Py_LeaveRecursiveCall(); + break; + case '[': + /* array */ + if (Py_EnterRecursiveCall(" while decoding a JSON array " + "from a string")) + return NULL; + rval = _parse_array_str(s, pystr, idx + 1, next_idx_ptr); + Py_LeaveRecursiveCall(); + break; + case 'n': + /* null */ + if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { + Py_INCREF(Py_None); + *next_idx_ptr = idx + 4; + rval = Py_None; + } + else + fallthrough = 1; + break; + case 't': + /* true */ + if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { + Py_INCREF(Py_True); + *next_idx_ptr = idx + 4; + rval = Py_True; + } + else + fallthrough = 1; + break; + case 'f': + /* false */ + if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { + Py_INCREF(Py_False); + *next_idx_ptr = idx + 5; + rval = Py_False; + } + else + fallthrough = 1; + break; + case 'N': + /* NaN */ + if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { + rval = _parse_constant(s, "NaN", idx, next_idx_ptr); + } + else + fallthrough = 1; + break; + case 'I': + /* Infinity */ + if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { + rval = _parse_constant(s, "Infinity", idx, next_idx_ptr); + } + else + fallthrough = 1; + break; + case '-': + /* -Infinity */ + if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { + rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr); + } + else + fallthrough = 1; + break; + default: + fallthrough = 1; + } + /* Didn't find a string, object, array, or named constant. Look for a number. */ + if (fallthrough) + rval = _match_number_str(s, pystr, idx, next_idx_ptr); + return rval; +} +#endif /* PY_MAJOR_VERSION < 3 */ + + +static PyObject * +scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read one JSON term (of any kind) from PyUnicode pystr. + idx is the index of the first character of the term + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of the term. + */ + PY2_UNUSED int kind = PyUnicode_KIND(pystr); + void *str = PyUnicode_DATA(pystr); + Py_ssize_t length = PyUnicode_GetLength(pystr); + PyObject *rval = NULL; + int fallthrough = 0; + if (idx >= length) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + return NULL; + } + switch (PyUnicode_READ(kind, str, idx)) { + case '"': + /* string */ + rval = scanstring_unicode(pystr, idx + 1, + PyObject_IsTrue(s->strict), + next_idx_ptr); + break; + case '{': + /* object */ + if (Py_EnterRecursiveCall(" while decoding a JSON object " + "from a unicode string")) + return NULL; + rval = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); + Py_LeaveRecursiveCall(); + break; + case '[': + /* array */ + if (Py_EnterRecursiveCall(" while decoding a JSON array " + "from a unicode string")) + return NULL; + rval = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); + Py_LeaveRecursiveCall(); + break; + case 'n': + /* null */ + if ((idx + 3 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'u' && + PyUnicode_READ(kind, str, idx + 2) == 'l' && + PyUnicode_READ(kind, str, idx + 3) == 'l') { + Py_INCREF(Py_None); + *next_idx_ptr = idx + 4; + rval = Py_None; + } + else + fallthrough = 1; + break; + case 't': + /* true */ + if ((idx + 3 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'r' && + PyUnicode_READ(kind, str, idx + 2) == 'u' && + PyUnicode_READ(kind, str, idx + 3) == 'e') { + Py_INCREF(Py_True); + *next_idx_ptr = idx + 4; + rval = Py_True; + } + else + fallthrough = 1; + break; + case 'f': + /* false */ + if ((idx + 4 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'a' && + PyUnicode_READ(kind, str, idx + 2) == 'l' && + PyUnicode_READ(kind, str, idx + 3) == 's' && + PyUnicode_READ(kind, str, idx + 4) == 'e') { + Py_INCREF(Py_False); + *next_idx_ptr = idx + 5; + rval = Py_False; + } + else + fallthrough = 1; + break; + case 'N': + /* NaN */ + if ((idx + 2 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'a' && + PyUnicode_READ(kind, str, idx + 2) == 'N') { + rval = _parse_constant(s, "NaN", idx, next_idx_ptr); + } + else + fallthrough = 1; + break; + case 'I': + /* Infinity */ + if ((idx + 7 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'n' && + PyUnicode_READ(kind, str, idx + 2) == 'f' && + PyUnicode_READ(kind, str, idx + 3) == 'i' && + PyUnicode_READ(kind, str, idx + 4) == 'n' && + PyUnicode_READ(kind, str, idx + 5) == 'i' && + PyUnicode_READ(kind, str, idx + 6) == 't' && + PyUnicode_READ(kind, str, idx + 7) == 'y') { + rval = _parse_constant(s, "Infinity", idx, next_idx_ptr); + } + else + fallthrough = 1; + break; + case '-': + /* -Infinity */ + if ((idx + 8 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'I' && + PyUnicode_READ(kind, str, idx + 2) == 'n' && + PyUnicode_READ(kind, str, idx + 3) == 'f' && + PyUnicode_READ(kind, str, idx + 4) == 'i' && + PyUnicode_READ(kind, str, idx + 5) == 'n' && + PyUnicode_READ(kind, str, idx + 6) == 'i' && + PyUnicode_READ(kind, str, idx + 7) == 't' && + PyUnicode_READ(kind, str, idx + 8) == 'y') { + rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr); + } + else + fallthrough = 1; + break; + default: + fallthrough = 1; + } + /* Didn't find a string, object, array, or named constant. Look for a number. */ + if (fallthrough) + rval = _match_number_unicode(s, pystr, idx, next_idx_ptr); + return rval; +} + +static PyObject * +scanner_call(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Python callable interface to scan_once_{str,unicode} */ + PyObject *pystr; + PyObject *rval; + Py_ssize_t idx; + Py_ssize_t next_idx = -1; + static char *kwlist[] = {"string", "idx", NULL}; + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx)) + return NULL; + + if (PyUnicode_Check(pystr)) { + rval = scan_once_unicode(s, pystr, idx, &next_idx); + } +#if PY_MAJOR_VERSION < 3 + else if (PyString_Check(pystr)) { + rval = scan_once_str(s, pystr, idx, &next_idx); + } +#endif /* PY_MAJOR_VERSION < 3 */ + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } + PyDict_Clear(s->memo); + return _build_rval_index_tuple(rval, next_idx); +} + +static PyObject * +scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyScannerObject *s; + s = (PyScannerObject *)type->tp_alloc(type, 0); + if (s != NULL) { + s->encoding = NULL; + s->strict = NULL; + s->object_hook = NULL; + s->pairs_hook = NULL; + s->parse_float = NULL; + s->parse_int = NULL; + s->parse_constant = NULL; + } + return (PyObject *)s; +} + +static PyObject * +JSON_ParseEncoding(PyObject *encoding) +{ + if (encoding == NULL) + return NULL; + if (encoding == Py_None) + return JSON_InternFromString(DEFAULT_ENCODING); +#if PY_MAJOR_VERSION < 3 + if (PyUnicode_Check(encoding)) + return PyUnicode_AsEncodedString(encoding, NULL, NULL); +#endif + if (JSON_ASCII_Check(encoding)) { + Py_INCREF(encoding); + return encoding; + } + PyErr_SetString(PyExc_TypeError, "encoding must be a string"); + return NULL; +} + +static int +scanner_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Initialize Scanner object */ + PyObject *ctx; + static char *kwlist[] = {"context", NULL}; + PyScannerObject *s; + PyObject *encoding; + + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx)) + return -1; + + if (s->memo == NULL) { + s->memo = PyDict_New(); + if (s->memo == NULL) + goto bail; + } + + /* JSON_ASCII_AS_STRING is used on encoding */ + encoding = PyObject_GetAttrString(ctx, "encoding"); + s->encoding = JSON_ParseEncoding(encoding); + Py_XDECREF(encoding); + if (s->encoding == NULL) + goto bail; + + /* All of these will fail "gracefully" so we don't need to verify them */ + s->strict = PyObject_GetAttrString(ctx, "strict"); + if (s->strict == NULL) + goto bail; + s->object_hook = PyObject_GetAttrString(ctx, "object_hook"); + if (s->object_hook == NULL) + goto bail; + s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook"); + if (s->pairs_hook == NULL) + goto bail; + s->parse_float = PyObject_GetAttrString(ctx, "parse_float"); + if (s->parse_float == NULL) + goto bail; + s->parse_int = PyObject_GetAttrString(ctx, "parse_int"); + if (s->parse_int == NULL) + goto bail; + s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant"); + if (s->parse_constant == NULL) + goto bail; + + return 0; + +bail: + Py_CLEAR(s->encoding); + Py_CLEAR(s->strict); + Py_CLEAR(s->object_hook); + Py_CLEAR(s->pairs_hook); + Py_CLEAR(s->parse_float); + Py_CLEAR(s->parse_int); + Py_CLEAR(s->parse_constant); + return -1; +} + +PyDoc_STRVAR(scanner_doc, "JSON scanner object"); + +static +PyTypeObject PyScannerType = { + PyVarObject_HEAD_INIT(NULL, 0) + "simplejson._speedups.Scanner", /* tp_name */ + sizeof(PyScannerObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + scanner_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + scanner_call, /* tp_call */ + 0, /* tp_str */ + 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */ + 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + scanner_doc, /* tp_doc */ + scanner_traverse, /* tp_traverse */ + scanner_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + scanner_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + scanner_init, /* tp_init */ + 0,/* PyType_GenericAlloc, */ /* tp_alloc */ + scanner_new, /* tp_new */ + 0,/* PyObject_GC_Del, */ /* tp_free */ +}; + +static PyObject * +encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyEncoderObject *s; + s = (PyEncoderObject *)type->tp_alloc(type, 0); + if (s != NULL) { + s->markers = NULL; + s->defaultfn = NULL; + s->encoder = NULL; + s->encoding = NULL; + s->indent = NULL; + s->key_separator = NULL; + s->item_separator = NULL; + s->key_memo = NULL; + s->sort_keys = NULL; + s->item_sort_key = NULL; + s->item_sort_kw = NULL; + s->Decimal = NULL; + } + return (PyObject *)s; +} + +static int +encoder_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* initialize Encoder object */ + static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", "key_memo", "use_decimal", "namedtuple_as_object", "tuple_as_array", "bigint_as_string", "item_sort_key", "encoding", "for_json", "ignore_nan", "Decimal", NULL}; + + PyEncoderObject *s; + PyObject *markers, *defaultfn, *encoder, *indent, *key_separator; + PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo; + PyObject *use_decimal, *namedtuple_as_object, *tuple_as_array; + PyObject *bigint_as_string, *item_sort_key, *encoding, *for_json; + PyObject *ignore_nan, *Decimal; + + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOOOOOOOOOO:make_encoder", kwlist, + &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator, + &sort_keys, &skipkeys, &allow_nan, &key_memo, &use_decimal, + &namedtuple_as_object, &tuple_as_array, &bigint_as_string, + &item_sort_key, &encoding, &for_json, &ignore_nan, &Decimal)) + return -1; + + s->markers = markers; + s->defaultfn = defaultfn; + s->encoder = encoder; + s->encoding = JSON_ParseEncoding(encoding); + if (s->encoding == NULL) + return -1; + s->indent = indent; + s->key_separator = key_separator; + s->item_separator = item_separator; + s->skipkeys_bool = skipkeys; + s->skipkeys = PyObject_IsTrue(skipkeys); + s->key_memo = key_memo; + s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii); + s->allow_or_ignore_nan = ( + (PyObject_IsTrue(ignore_nan) ? JSON_IGNORE_NAN : 0) | + (PyObject_IsTrue(allow_nan) ? JSON_ALLOW_NAN : 0)); + s->use_decimal = PyObject_IsTrue(use_decimal); + s->namedtuple_as_object = PyObject_IsTrue(namedtuple_as_object); + s->tuple_as_array = PyObject_IsTrue(tuple_as_array); + s->bigint_as_string = PyObject_IsTrue(bigint_as_string); + if (item_sort_key != Py_None) { + if (!PyCallable_Check(item_sort_key)) + PyErr_SetString(PyExc_TypeError, "item_sort_key must be None or callable"); + } + else if (PyObject_IsTrue(sort_keys)) { + static PyObject *itemgetter0 = NULL; + if (!itemgetter0) { + PyObject *operator = PyImport_ImportModule("operator"); + if (!operator) + return -1; + itemgetter0 = PyObject_CallMethod(operator, "itemgetter", "i", 0); + Py_DECREF(operator); + } + item_sort_key = itemgetter0; + if (!item_sort_key) + return -1; + } + if (item_sort_key == Py_None) { + Py_INCREF(Py_None); + s->item_sort_kw = Py_None; + } + else { + s->item_sort_kw = PyDict_New(); + if (s->item_sort_kw == NULL) + return -1; + if (PyDict_SetItemString(s->item_sort_kw, "key", item_sort_key)) + return -1; + } + s->sort_keys = sort_keys; + s->item_sort_key = item_sort_key; + s->Decimal = Decimal; + s->for_json = PyObject_IsTrue(for_json); + + Py_INCREF(s->markers); + Py_INCREF(s->defaultfn); + Py_INCREF(s->encoder); + Py_INCREF(s->indent); + Py_INCREF(s->key_separator); + Py_INCREF(s->item_separator); + Py_INCREF(s->key_memo); + Py_INCREF(s->skipkeys_bool); + Py_INCREF(s->sort_keys); + Py_INCREF(s->item_sort_key); + Py_INCREF(s->Decimal); + return 0; +} + +static PyObject * +encoder_call(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Python callable interface to encode_listencode_obj */ + static char *kwlist[] = {"obj", "_current_indent_level", NULL}; + PyObject *obj; + Py_ssize_t indent_level; + PyEncoderObject *s; + JSON_Accu rval; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist, + &obj, _convertPyInt_AsSsize_t, &indent_level)) + return NULL; + if (JSON_Accu_Init(&rval)) + return NULL; + if (encoder_listencode_obj(s, &rval, obj, indent_level)) { + JSON_Accu_Destroy(&rval); + return NULL; + } + return JSON_Accu_FinishAsList(&rval); +} + +static PyObject * +_encoded_const(PyObject *obj) +{ + /* Return the JSON string representation of None, True, False */ + if (obj == Py_None) { + static PyObject *s_null = NULL; + if (s_null == NULL) { + s_null = JSON_InternFromString("null"); + } + Py_INCREF(s_null); + return s_null; + } + else if (obj == Py_True) { + static PyObject *s_true = NULL; + if (s_true == NULL) { + s_true = JSON_InternFromString("true"); + } + Py_INCREF(s_true); + return s_true; + } + else if (obj == Py_False) { + static PyObject *s_false = NULL; + if (s_false == NULL) { + s_false = JSON_InternFromString("false"); + } + Py_INCREF(s_false); + return s_false; + } + else { + PyErr_SetString(PyExc_ValueError, "not a const"); + return NULL; + } +} + +static PyObject * +encoder_encode_float(PyEncoderObject *s, PyObject *obj) +{ + /* Return the JSON representation of a PyFloat */ + double i = PyFloat_AS_DOUBLE(obj); + if (!Py_IS_FINITE(i)) { + if (!s->allow_or_ignore_nan) { + PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant"); + return NULL; + } + if (s->allow_or_ignore_nan & JSON_IGNORE_NAN) { + return _encoded_const(Py_None); + } + /* JSON_ALLOW_NAN is set */ + else if (i > 0) { + static PyObject *sInfinity = NULL; + if (sInfinity == NULL) + sInfinity = JSON_InternFromString("Infinity"); + if (sInfinity) + Py_INCREF(sInfinity); + return sInfinity; + } + else if (i < 0) { + static PyObject *sNegInfinity = NULL; + if (sNegInfinity == NULL) + sNegInfinity = JSON_InternFromString("-Infinity"); + if (sNegInfinity) + Py_INCREF(sNegInfinity); + return sNegInfinity; + } + else { + static PyObject *sNaN = NULL; + if (sNaN == NULL) + sNaN = JSON_InternFromString("NaN"); + if (sNaN) + Py_INCREF(sNaN); + return sNaN; + } + } + /* Use a better float format here? */ + return PyObject_Repr(obj); +} + +static PyObject * +encoder_encode_string(PyEncoderObject *s, PyObject *obj) +{ + /* Return the JSON representation of a string */ + if (s->fast_encode) + return py_encode_basestring_ascii(NULL, obj); + else + return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL); +} + +static int +_steal_accumulate(JSON_Accu *accu, PyObject *stolen) +{ + /* Append stolen and then decrement its reference count */ + int rval = JSON_Accu_Accumulate(accu, stolen); + Py_DECREF(stolen); + return rval; +} + +static int +encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ssize_t indent_level) +{ + /* Encode Python object obj to a JSON term, rval is a PyList */ + int rv = -1; + do { + if (obj == Py_None || obj == Py_True || obj == Py_False) { + PyObject *cstr = _encoded_const(obj); + if (cstr != NULL) + rv = _steal_accumulate(rval, cstr); + } + else if (PyString_Check(obj) || PyUnicode_Check(obj)) + { + PyObject *encoded = encoder_encode_string(s, obj); + if (encoded != NULL) + rv = _steal_accumulate(rval, encoded); + } + else if (PyInt_Check(obj) || PyLong_Check(obj)) { + PyObject *encoded = PyObject_Str(obj); + if (encoded != NULL) { + if (s->bigint_as_string) { + encoded = maybe_quote_bigint(encoded, obj); + if (encoded == NULL) + break; + } + rv = _steal_accumulate(rval, encoded); + } + } + else if (PyFloat_Check(obj)) { + PyObject *encoded = encoder_encode_float(s, obj); + if (encoded != NULL) + rv = _steal_accumulate(rval, encoded); + } + else if (s->for_json && _has_for_json_hook(obj)) { + PyObject *newobj; + if (Py_EnterRecursiveCall(" while encoding a JSON object")) + return rv; + newobj = PyObject_CallMethod(obj, "for_json", NULL); + if (newobj != NULL) { + rv = encoder_listencode_obj(s, rval, newobj, indent_level); + Py_DECREF(newobj); + } + Py_LeaveRecursiveCall(); + } + else if (s->namedtuple_as_object && _is_namedtuple(obj)) { + PyObject *newobj; + if (Py_EnterRecursiveCall(" while encoding a JSON object")) + return rv; + newobj = PyObject_CallMethod(obj, "_asdict", NULL); + if (newobj != NULL) { + rv = encoder_listencode_dict(s, rval, newobj, indent_level); + Py_DECREF(newobj); + } + Py_LeaveRecursiveCall(); + } + else if (PyList_Check(obj) || (s->tuple_as_array && PyTuple_Check(obj))) { + if (Py_EnterRecursiveCall(" while encoding a JSON object")) + return rv; + rv = encoder_listencode_list(s, rval, obj, indent_level); + Py_LeaveRecursiveCall(); + } + else if (PyDict_Check(obj)) { + if (Py_EnterRecursiveCall(" while encoding a JSON object")) + return rv; + rv = encoder_listencode_dict(s, rval, obj, indent_level); + Py_LeaveRecursiveCall(); + } + else if (s->use_decimal && PyObject_TypeCheck(obj, (PyTypeObject *)s->Decimal)) { + PyObject *encoded = PyObject_Str(obj); + if (encoded != NULL) + rv = _steal_accumulate(rval, encoded); + } + else { + PyObject *ident = NULL; + PyObject *newobj; + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(obj); + if (ident == NULL) + break; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + Py_DECREF(ident); + break; + } + if (PyDict_SetItem(s->markers, ident, obj)) { + Py_DECREF(ident); + break; + } + } + if (Py_EnterRecursiveCall(" while encoding a JSON object")) + return rv; + newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL); + if (newobj == NULL) { + Py_XDECREF(ident); + Py_LeaveRecursiveCall(); + break; + } + rv = encoder_listencode_obj(s, rval, newobj, indent_level); + Py_LeaveRecursiveCall(); + Py_DECREF(newobj); + if (rv) { + Py_XDECREF(ident); + rv = -1; + } + else if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) { + Py_XDECREF(ident); + rv = -1; + } + Py_XDECREF(ident); + } + } + } while (0); + return rv; +} + +static int +encoder_listencode_dict(PyEncoderObject *s, JSON_Accu *rval, PyObject *dct, Py_ssize_t indent_level) +{ + /* Encode Python dict dct a JSON term */ + static PyObject *open_dict = NULL; + static PyObject *close_dict = NULL; + static PyObject *empty_dict = NULL; + PyObject *kstr = NULL; + PyObject *ident = NULL; + PyObject *iter = NULL; + PyObject *item = NULL; + PyObject *items = NULL; + PyObject *encoded = NULL; + Py_ssize_t idx; + + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) { + open_dict = JSON_InternFromString("{"); + close_dict = JSON_InternFromString("}"); + empty_dict = JSON_InternFromString("{}"); + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) + return -1; + } + if (PyDict_Size(dct) == 0) + return JSON_Accu_Accumulate(rval, empty_dict); + + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(dct); + if (ident == NULL) + goto bail; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + goto bail; + } + if (PyDict_SetItem(s->markers, ident, dct)) { + goto bail; + } + } + + if (JSON_Accu_Accumulate(rval, open_dict)) + goto bail; + + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level += 1; + /* + newline_indent = '\n' + (_indent * _current_indent_level) + separator = _item_separator + newline_indent + buf += newline_indent + */ + } + + iter = encoder_dict_iteritems(s, dct); + if (iter == NULL) + goto bail; + + idx = 0; + while ((item = PyIter_Next(iter))) { + PyObject *encoded, *key, *value; + if (!PyTuple_Check(item) || Py_SIZE(item) != 2) { + PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); + goto bail; + } + key = PyTuple_GET_ITEM(item, 0); + if (key == NULL) + goto bail; + value = PyTuple_GET_ITEM(item, 1); + if (value == NULL) + goto bail; + + encoded = PyDict_GetItem(s->key_memo, key); + if (encoded != NULL) { + Py_INCREF(encoded); + } else { + kstr = encoder_stringify_key(s, key); + if (kstr == NULL) + goto bail; + else if (kstr == Py_None) { + /* skipkeys */ + Py_DECREF(item); + Py_DECREF(kstr); + continue; + } + } + if (idx) { + if (JSON_Accu_Accumulate(rval, s->item_separator)) + goto bail; + } + if (encoded == NULL) { + encoded = encoder_encode_string(s, kstr); + Py_CLEAR(kstr); + if (encoded == NULL) + goto bail; + if (PyDict_SetItem(s->key_memo, key, encoded)) + goto bail; + } + if (JSON_Accu_Accumulate(rval, encoded)) { + goto bail; + } + Py_CLEAR(encoded); + if (JSON_Accu_Accumulate(rval, s->key_separator)) + goto bail; + if (encoder_listencode_obj(s, rval, value, indent_level)) + goto bail; + Py_CLEAR(item); + idx += 1; + } + Py_CLEAR(iter); + if (PyErr_Occurred()) + goto bail; + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) + goto bail; + Py_CLEAR(ident); + } + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level -= 1; + /* + yield '\n' + (_indent * _current_indent_level) + */ + } + if (JSON_Accu_Accumulate(rval, close_dict)) + goto bail; + return 0; + +bail: + Py_XDECREF(encoded); + Py_XDECREF(items); + Py_XDECREF(iter); + Py_XDECREF(kstr); + Py_XDECREF(ident); + return -1; +} + + +static int +encoder_listencode_list(PyEncoderObject *s, JSON_Accu *rval, PyObject *seq, Py_ssize_t indent_level) +{ + /* Encode Python list seq to a JSON term */ + static PyObject *open_array = NULL; + static PyObject *close_array = NULL; + static PyObject *empty_array = NULL; + PyObject *ident = NULL; + PyObject *iter = NULL; + PyObject *obj = NULL; + int is_true; + int i = 0; + + if (open_array == NULL || close_array == NULL || empty_array == NULL) { + open_array = JSON_InternFromString("["); + close_array = JSON_InternFromString("]"); + empty_array = JSON_InternFromString("[]"); + if (open_array == NULL || close_array == NULL || empty_array == NULL) + return -1; + } + ident = NULL; + is_true = PyObject_IsTrue(seq); + if (is_true == -1) + return -1; + else if (is_true == 0) + return JSON_Accu_Accumulate(rval, empty_array); + + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(seq); + if (ident == NULL) + goto bail; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + goto bail; + } + if (PyDict_SetItem(s->markers, ident, seq)) { + goto bail; + } + } + + iter = PyObject_GetIter(seq); + if (iter == NULL) + goto bail; + + if (JSON_Accu_Accumulate(rval, open_array)) + goto bail; + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level += 1; + /* + newline_indent = '\n' + (_indent * _current_indent_level) + separator = _item_separator + newline_indent + buf += newline_indent + */ + } + while ((obj = PyIter_Next(iter))) { + if (i) { + if (JSON_Accu_Accumulate(rval, s->item_separator)) + goto bail; + } + if (encoder_listencode_obj(s, rval, obj, indent_level)) + goto bail; + i++; + Py_CLEAR(obj); + } + Py_CLEAR(iter); + if (PyErr_Occurred()) + goto bail; + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) + goto bail; + Py_CLEAR(ident); + } + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level -= 1; + /* + yield '\n' + (_indent * _current_indent_level) + */ + } + if (JSON_Accu_Accumulate(rval, close_array)) + goto bail; + return 0; + +bail: + Py_XDECREF(obj); + Py_XDECREF(iter); + Py_XDECREF(ident); + return -1; +} + +static void +encoder_dealloc(PyObject *self) +{ + /* Deallocate Encoder */ + encoder_clear(self); + Py_TYPE(self)->tp_free(self); +} + +static int +encoder_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + Py_VISIT(s->markers); + Py_VISIT(s->defaultfn); + Py_VISIT(s->encoder); + Py_VISIT(s->encoding); + Py_VISIT(s->indent); + Py_VISIT(s->key_separator); + Py_VISIT(s->item_separator); + Py_VISIT(s->key_memo); + Py_VISIT(s->sort_keys); + Py_VISIT(s->item_sort_kw); + Py_VISIT(s->item_sort_key); + Py_VISIT(s->Decimal); + return 0; +} + +static int +encoder_clear(PyObject *self) +{ + /* Deallocate Encoder */ + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + Py_CLEAR(s->markers); + Py_CLEAR(s->defaultfn); + Py_CLEAR(s->encoder); + Py_CLEAR(s->encoding); + Py_CLEAR(s->indent); + Py_CLEAR(s->key_separator); + Py_CLEAR(s->item_separator); + Py_CLEAR(s->key_memo); + Py_CLEAR(s->skipkeys_bool); + Py_CLEAR(s->sort_keys); + Py_CLEAR(s->item_sort_kw); + Py_CLEAR(s->item_sort_key); + Py_CLEAR(s->Decimal); + return 0; +} + +PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable"); + +static +PyTypeObject PyEncoderType = { + PyVarObject_HEAD_INIT(NULL, 0) + "simplejson._speedups.Encoder", /* tp_name */ + sizeof(PyEncoderObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + encoder_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + encoder_call, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + encoder_doc, /* tp_doc */ + encoder_traverse, /* tp_traverse */ + encoder_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + encoder_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + encoder_init, /* tp_init */ + 0, /* tp_alloc */ + encoder_new, /* tp_new */ + 0, /* tp_free */ +}; + +static PyMethodDef speedups_methods[] = { + {"encode_basestring_ascii", + (PyCFunction)py_encode_basestring_ascii, + METH_O, + pydoc_encode_basestring_ascii}, + {"scanstring", + (PyCFunction)py_scanstring, + METH_VARARGS, + pydoc_scanstring}, + {NULL, NULL, 0, NULL} +}; + +PyDoc_STRVAR(module_doc, +"simplejson speedups\n"); + +#if PY_MAJOR_VERSION >= 3 +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "_speedups", /* m_name */ + module_doc, /* m_doc */ + -1, /* m_size */ + speedups_methods, /* m_methods */ + NULL, /* m_reload */ + NULL, /* m_traverse */ + NULL, /* m_clear*/ + NULL, /* m_free */ +}; +#endif + +static PyObject * +moduleinit(void) +{ + PyObject *m; + PyScannerType.tp_new = PyType_GenericNew; + if (PyType_Ready(&PyScannerType) < 0) + return NULL; + PyEncoderType.tp_new = PyType_GenericNew; + if (PyType_Ready(&PyEncoderType) < 0) + return NULL; + +#if PY_MAJOR_VERSION >= 3 + m = PyModule_Create(&moduledef); +#else + m = Py_InitModule3("_speedups", speedups_methods, module_doc); +#endif + Py_INCREF((PyObject*)&PyScannerType); + PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType); + Py_INCREF((PyObject*)&PyEncoderType); + PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType); + return m; +} + +#if PY_MAJOR_VERSION >= 3 +PyMODINIT_FUNC +PyInit__speedups(void) +{ + return moduleinit(); +} +#else +void +init_speedups(void) +{ + moduleinit(); +} +#endif diff --git a/simplejson/compat.py b/simplejson/compat.py new file mode 100644 index 0000000..449e48a --- /dev/null +++ b/simplejson/compat.py @@ -0,0 +1,43 @@ +"""Python 3 compatibility shims +""" +import sys +if sys.version_info[0] < 3: + PY3 = False + def b(s): + return s + def u(s): + return unicode(s, 'unicode_escape') + import cStringIO as StringIO + StringIO = BytesIO = StringIO.StringIO + text_type = unicode + binary_type = str + string_types = (basestring,) + integer_types = (int, long) + unichr = unichr + reload_module = reload + def fromhex(s): + return s.decode('hex') + +else: + PY3 = True + from imp import reload as reload_module + import codecs + def b(s): + return codecs.latin_1_encode(s)[0] + def u(s): + return s + import io + StringIO = io.StringIO + BytesIO = io.BytesIO + text_type = str + binary_type = bytes + string_types = (str,) + integer_types = (int,) + + def unichr(s): + return u(chr(s)) + + def fromhex(s): + return bytes.fromhex(s) + +long_type = integer_types[-1] diff --git a/simplejson/decoder.py b/simplejson/decoder.py new file mode 100644 index 0000000..5ccb450 --- /dev/null +++ b/simplejson/decoder.py @@ -0,0 +1,389 @@ +"""Implementation of JSONDecoder +""" +from __future__ import absolute_import +import re +import sys +import struct +from .compat import fromhex, b, u, text_type, binary_type, PY3, unichr +from .scanner import make_scanner, JSONDecodeError + +def _import_c_scanstring(): + try: + from ._speedups import scanstring + return scanstring + except ImportError: + return None +c_scanstring = _import_c_scanstring() + +# NOTE (3.1.0): JSONDecodeError may still be imported from this module for +# compatibility, but it was never in the __all__ +__all__ = ['JSONDecoder'] + +FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL + +def _floatconstants(): + _BYTES = fromhex('7FF80000000000007FF0000000000000') + # The struct module in Python 2.4 would get frexp() out of range here + # when an endian is specified in the format string. Fixed in Python 2.5+ + if sys.byteorder != 'big': + _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] + nan, inf = struct.unpack('dd', _BYTES) + return nan, inf, -inf + +NaN, PosInf, NegInf = _floatconstants() + +_CONSTANTS = { + '-Infinity': NegInf, + 'Infinity': PosInf, + 'NaN': NaN, +} + +STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) +BACKSLASH = { + '"': u('"'), '\\': u('\u005c'), '/': u('/'), + 'b': u('\b'), 'f': u('\f'), 'n': u('\n'), 'r': u('\r'), 't': u('\t'), +} + +DEFAULT_ENCODING = "utf-8" + +def py_scanstring(s, end, encoding=None, strict=True, + _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u('').join, + _PY3=PY3, _maxunicode=sys.maxunicode): + """Scan the string s for a JSON string. End is the index of the + character in s after the quote that started the JSON string. + Unescapes all valid JSON string escape sequences and raises ValueError + on attempt to decode an invalid string. If strict is False then literal + control characters are allowed in the string. + + Returns a tuple of the decoded string and the index of the character in s + after the end quote.""" + if encoding is None: + encoding = DEFAULT_ENCODING + chunks = [] + _append = chunks.append + begin = end - 1 + while 1: + chunk = _m(s, end) + if chunk is None: + raise JSONDecodeError( + "Unterminated string starting at", s, begin) + end = chunk.end() + content, terminator = chunk.groups() + # Content is contains zero or more unescaped string characters + if content: + if not _PY3 and not isinstance(content, text_type): + content = text_type(content, encoding) + _append(content) + # Terminator is the end of string, a literal control character, + # or a backslash denoting that an escape sequence follows + if terminator == '"': + break + elif terminator != '\\': + if strict: + msg = "Invalid control character %r at" + raise JSONDecodeError(msg, s, end) + else: + _append(terminator) + continue + try: + esc = s[end] + except IndexError: + raise JSONDecodeError( + "Unterminated string starting at", s, begin) + # If not a unicode escape sequence, must be in the lookup table + if esc != 'u': + try: + char = _b[esc] + except KeyError: + msg = "Invalid \\X escape sequence %r" + raise JSONDecodeError(msg, s, end) + end += 1 + else: + # Unicode escape sequence + msg = "Invalid \\uXXXX escape sequence" + esc = s[end + 1:end + 5] + escX = esc[1:2] + if len(esc) != 4 or escX == 'x' or escX == 'X': + raise JSONDecodeError(msg, s, end - 1) + try: + uni = int(esc, 16) + except ValueError: + raise JSONDecodeError(msg, s, end - 1) + end += 5 + # Check for surrogate pair on UCS-4 systems + # Note that this will join high/low surrogate pairs + # but will also pass unpaired surrogates through + if (_maxunicode > 65535 and + uni & 0xfc00 == 0xd800 and + s[end:end + 2] == '\\u'): + esc2 = s[end + 2:end + 6] + escX = esc2[1:2] + if len(esc2) == 4 and not (escX == 'x' or escX == 'X'): + try: + uni2 = int(esc2, 16) + except ValueError: + raise JSONDecodeError(msg, s, end) + if uni2 & 0xfc00 == 0xdc00: + uni = 0x10000 + (((uni - 0xd800) << 10) | + (uni2 - 0xdc00)) + end += 6 + char = unichr(uni) + # Append the unescaped character + _append(char) + return _join(chunks), end + + +# Use speedup if available +scanstring = c_scanstring or py_scanstring + +WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) +WHITESPACE_STR = ' \t\n\r' + +def JSONObject(state, encoding, strict, scan_once, object_hook, + object_pairs_hook, memo=None, + _w=WHITESPACE.match, _ws=WHITESPACE_STR): + (s, end) = state + # Backwards compatibility + if memo is None: + memo = {} + memo_get = memo.setdefault + pairs = [] + # Use a slice to prevent IndexError from being raised, the following + # check will raise a more specific ValueError if the string is empty + nextchar = s[end:end + 1] + # Normally we expect nextchar == '"' + if nextchar != '"': + if nextchar in _ws: + end = _w(s, end).end() + nextchar = s[end:end + 1] + # Trivial empty object + if nextchar == '}': + if object_pairs_hook is not None: + result = object_pairs_hook(pairs) + return result, end + 1 + pairs = {} + if object_hook is not None: + pairs = object_hook(pairs) + return pairs, end + 1 + elif nextchar != '"': + raise JSONDecodeError( + "Expecting property name enclosed in double quotes", + s, end) + end += 1 + while True: + key, end = scanstring(s, end, encoding, strict) + key = memo_get(key, key) + + # To skip some function call overhead we optimize the fast paths where + # the JSON key separator is ": " or just ":". + if s[end:end + 1] != ':': + end = _w(s, end).end() + if s[end:end + 1] != ':': + raise JSONDecodeError("Expecting ':' delimiter", s, end) + + end += 1 + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + value, end = scan_once(s, end) + pairs.append((key, value)) + + try: + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + end += 1 + + if nextchar == '}': + break + elif nextchar != ',': + raise JSONDecodeError("Expecting ',' delimiter or '}'", s, end - 1) + + try: + nextchar = s[end] + if nextchar in _ws: + end += 1 + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + + end += 1 + if nextchar != '"': + raise JSONDecodeError( + "Expecting property name enclosed in double quotes", + s, end - 1) + + if object_pairs_hook is not None: + result = object_pairs_hook(pairs) + return result, end + pairs = dict(pairs) + if object_hook is not None: + pairs = object_hook(pairs) + return pairs, end + +def JSONArray(state, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + (s, end) = state + values = [] + nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] + # Look-ahead for trivial empty array + if nextchar == ']': + return values, end + 1 + elif nextchar == '': + raise JSONDecodeError("Expecting value or ']'", s, end) + _append = values.append + while True: + value, end = scan_once(s, end) + _append(value) + nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar == ']': + break + elif nextchar != ',': + raise JSONDecodeError("Expecting ',' delimiter or ']'", s, end - 1) + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + return values, end + +class JSONDecoder(object): + """Simple JSON <http://json.org> decoder + + Performs the following translations in decoding by default: + + +---------------+-------------------+ + | JSON | Python | + +===============+===================+ + | object | dict | + +---------------+-------------------+ + | array | list | + +---------------+-------------------+ + | string | unicode | + +---------------+-------------------+ + | number (int) | int, long | + +---------------+-------------------+ + | number (real) | float | + +---------------+-------------------+ + | true | True | + +---------------+-------------------+ + | false | False | + +---------------+-------------------+ + | null | None | + +---------------+-------------------+ + + It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as + their corresponding ``float`` values, which is outside the JSON spec. + + """ + + def __init__(self, encoding=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, strict=True, + object_pairs_hook=None): + """ + *encoding* determines the encoding used to interpret any + :class:`str` objects decoded by this instance (``'utf-8'`` by + default). It has no effect when decoding :class:`unicode` objects. + + Note that currently only encodings that are a superset of ASCII work, + strings of other encodings should be passed in as :class:`unicode`. + + *object_hook*, if specified, will be called with the result of every + JSON object decoded and its return value will be used in place of the + given :class:`dict`. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + + *object_pairs_hook* is an optional function that will be called with + the result of any object literal decode with an ordered list of pairs. + The return value of *object_pairs_hook* will be used instead of the + :class:`dict`. This feature can be used to implement custom decoders + that rely on the order that the key and value pairs are decoded (for + example, :func:`collections.OrderedDict` will remember the order of + insertion). If *object_hook* is also defined, the *object_pairs_hook* + takes priority. + + *parse_float*, if specified, will be called with the string of every + JSON float to be decoded. By default, this is equivalent to + ``float(num_str)``. This can be used to use another datatype or parser + for JSON floats (e.g. :class:`decimal.Decimal`). + + *parse_int*, if specified, will be called with the string of every + JSON int to be decoded. By default, this is equivalent to + ``int(num_str)``. This can be used to use another datatype or parser + for JSON integers (e.g. :class:`float`). + + *parse_constant*, if specified, will be called with one of the + following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This + can be used to raise an exception if invalid JSON numbers are + encountered. + + *strict* controls the parser's behavior when it encounters an + invalid control character in a string. The default setting of + ``True`` means that unescaped control characters are parse errors, if + ``False`` then control characters will be allowed in strings. + + """ + if encoding is None: + encoding = DEFAULT_ENCODING + self.encoding = encoding + self.object_hook = object_hook + self.object_pairs_hook = object_pairs_hook + self.parse_float = parse_float or float + self.parse_int = parse_int or int + self.parse_constant = parse_constant or _CONSTANTS.__getitem__ + self.strict = strict + self.parse_object = JSONObject + self.parse_array = JSONArray + self.parse_string = scanstring + self.memo = {} + self.scan_once = make_scanner(self) + + def decode(self, s, _w=WHITESPACE.match, _PY3=PY3): + """Return the Python representation of ``s`` (a ``str`` or ``unicode`` + instance containing a JSON document) + + """ + if _PY3 and isinstance(s, binary_type): + s = s.decode(self.encoding) + obj, end = self.raw_decode(s) + end = _w(s, end).end() + if end != len(s): + raise JSONDecodeError("Extra data", s, end, len(s)) + return obj + + def raw_decode(self, s, idx=0, _w=WHITESPACE.match, _PY3=PY3): + """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` + beginning with a JSON document) and return a 2-tuple of the Python + representation and the index in ``s`` where the document ended. + Optionally, ``idx`` can be used to specify an offset in ``s`` where + the JSON document begins. + + This can be used to decode a JSON document from a string that may + have extraneous data at the end. + + """ + if _PY3 and not isinstance(s, text_type): + raise TypeError("Input string must be text, not bytes") + return self.scan_once(s, idx=_w(s, idx).end()) diff --git a/simplejson/encoder.py b/simplejson/encoder.py new file mode 100644 index 0000000..9815ee5 --- /dev/null +++ b/simplejson/encoder.py @@ -0,0 +1,628 @@ +"""Implementation of JSONEncoder +""" +from __future__ import absolute_import +import re +from operator import itemgetter +from decimal import Decimal +from .compat import u, unichr, binary_type, string_types, integer_types, PY3 +def _import_speedups(): + try: + from . import _speedups + return _speedups.encode_basestring_ascii, _speedups.make_encoder + except ImportError: + return None, None +c_encode_basestring_ascii, c_make_encoder = _import_speedups() + +from simplejson.decoder import PosInf + +#ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]') +# This is required because u() will mangle the string and ur'' isn't valid +# python3 syntax +ESCAPE = re.compile(u'[\\x00-\\x1f\\\\"\\b\\f\\n\\r\\t\u2028\u2029]') +ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') +HAS_UTF8 = re.compile(r'[\x80-\xff]') +ESCAPE_DCT = { + '\\': '\\\\', + '"': '\\"', + '\b': '\\b', + '\f': '\\f', + '\n': '\\n', + '\r': '\\r', + '\t': '\\t', +} +for i in range(0x20): + #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) + ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) +for i in [0x2028, 0x2029]: + ESCAPE_DCT.setdefault(unichr(i), '\\u%04x' % (i,)) + +FLOAT_REPR = repr + +def encode_basestring(s, _PY3=PY3, _q=u('"')): + """Return a JSON representation of a Python string + + """ + if _PY3: + if isinstance(s, binary_type): + s = s.decode('utf-8') + else: + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = s.decode('utf-8') + def replace(match): + return ESCAPE_DCT[match.group(0)] + return _q + ESCAPE.sub(replace, s) + _q + + +def py_encode_basestring_ascii(s, _PY3=PY3): + """Return an ASCII-only JSON representation of a Python string + + """ + if _PY3: + if isinstance(s, binary_type): + s = s.decode('utf-8') + else: + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = s.decode('utf-8') + def replace(match): + s = match.group(0) + try: + return ESCAPE_DCT[s] + except KeyError: + n = ord(s) + if n < 0x10000: + #return '\\u{0:04x}'.format(n) + return '\\u%04x' % (n,) + else: + # surrogate pair + n -= 0x10000 + s1 = 0xd800 | ((n >> 10) & 0x3ff) + s2 = 0xdc00 | (n & 0x3ff) + #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) + return '\\u%04x\\u%04x' % (s1, s2) + return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' + + +encode_basestring_ascii = ( + c_encode_basestring_ascii or py_encode_basestring_ascii) + +class JSONEncoder(object): + """Extensible JSON <http://json.org> encoder for Python data structures. + + Supports the following objects and types by default: + + +-------------------+---------------+ + | Python | JSON | + +===================+===============+ + | dict, namedtuple | object | + +-------------------+---------------+ + | list, tuple | array | + +-------------------+---------------+ + | str, unicode | string | + +-------------------+---------------+ + | int, long, float | number | + +-------------------+---------------+ + | True | true | + +-------------------+---------------+ + | False | false | + +-------------------+---------------+ + | None | null | + +-------------------+---------------+ + + To extend this to recognize other objects, subclass and implement a + ``.default()`` method with another method that returns a serializable + object for ``o`` if possible, otherwise it should call the superclass + implementation (to raise ``TypeError``). + + """ + item_separator = ', ' + key_separator = ': ' + def __init__(self, skipkeys=False, ensure_ascii=True, + check_circular=True, allow_nan=True, sort_keys=False, + indent=None, separators=None, encoding='utf-8', default=None, + use_decimal=True, namedtuple_as_object=True, + tuple_as_array=True, bigint_as_string=False, + item_sort_key=None, for_json=False, ignore_nan=False): + """Constructor for JSONEncoder, with sensible defaults. + + If skipkeys is false, then it is a TypeError to attempt + encoding of keys that are not str, int, long, float or None. If + skipkeys is True, such items are simply skipped. + + If ensure_ascii is true, the output is guaranteed to be str + objects with all incoming unicode characters escaped. If + ensure_ascii is false, the output will be unicode object. + + If check_circular is true, then lists, dicts, and custom encoded + objects will be checked for circular references during encoding to + prevent an infinite recursion (which would cause an OverflowError). + Otherwise, no such check takes place. + + If allow_nan is true, then NaN, Infinity, and -Infinity will be + encoded as such. This behavior is not JSON specification compliant, + but is consistent with most JavaScript based encoders and decoders. + Otherwise, it will be a ValueError to encode such floats. + + If sort_keys is true, then the output of dictionaries will be + sorted by key; this is useful for regression tests to ensure + that JSON serializations can be compared on a day-to-day basis. + + If indent is a string, then JSON array elements and object members + will be pretty-printed with a newline followed by that string repeated + for each level of nesting. ``None`` (the default) selects the most compact + representation without any newlines. For backwards compatibility with + versions of simplejson earlier than 2.1.0, an integer is also accepted + and is converted to a string with that many spaces. + + If specified, separators should be an (item_separator, key_separator) + tuple. The default is (', ', ': ') if *indent* is ``None`` and + (',', ': ') otherwise. To get the most compact JSON representation, + you should specify (',', ':') to eliminate whitespace. + + If specified, default is a function that gets called for objects + that can't otherwise be serialized. It should return a JSON encodable + version of the object or raise a ``TypeError``. + + If encoding is not None, then all input strings will be + transformed into unicode using that encoding prior to JSON-encoding. + The default is UTF-8. + + If use_decimal is true (not the default), ``decimal.Decimal`` will + be supported directly by the encoder. For the inverse, decode JSON + with ``parse_float=decimal.Decimal``. + + If namedtuple_as_object is true (the default), objects with + ``_asdict()`` methods will be encoded as JSON objects. + + If tuple_as_array is true (the default), tuple (and subclasses) will + be encoded as JSON arrays. + + If bigint_as_string is true (not the default), ints 2**53 and higher + or lower than -2**53 will be encoded as strings. This is to avoid the + rounding that happens in Javascript otherwise. + + If specified, item_sort_key is a callable used to sort the items in + each dictionary. This is useful if you want to sort items other than + in alphabetical order by key. + + If for_json is true (not the default), objects with a ``for_json()`` + method will use the return value of that method for encoding as JSON + instead of the object. + + If *ignore_nan* is true (default: ``False``), then out of range + :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized + as ``null`` in compliance with the ECMA-262 specification. If true, + this will override *allow_nan*. + + """ + + self.skipkeys = skipkeys + self.ensure_ascii = ensure_ascii + self.check_circular = check_circular + self.allow_nan = allow_nan + self.sort_keys = sort_keys + self.use_decimal = use_decimal + self.namedtuple_as_object = namedtuple_as_object + self.tuple_as_array = tuple_as_array + self.bigint_as_string = bigint_as_string + self.item_sort_key = item_sort_key + self.for_json = for_json + self.ignore_nan = ignore_nan + if indent is not None and not isinstance(indent, string_types): + indent = indent * ' ' + self.indent = indent + if separators is not None: + self.item_separator, self.key_separator = separators + elif indent is not None: + self.item_separator = ',' + if default is not None: + self.default = default + self.encoding = encoding + + def default(self, o): + """Implement this method in a subclass such that it returns + a serializable object for ``o``, or calls the base implementation + (to raise a ``TypeError``). + + For example, to support arbitrary iterators, you could + implement default like this:: + + def default(self, o): + try: + iterable = iter(o) + except TypeError: + pass + else: + return list(iterable) + return JSONEncoder.default(self, o) + + """ + raise TypeError(repr(o) + " is not JSON serializable") + + def encode(self, o): + """Return a JSON string representation of a Python data structure. + + >>> from simplejson import JSONEncoder + >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) + '{"foo": ["bar", "baz"]}' + + """ + # This is for extremely simple cases and benchmarks. + if isinstance(o, binary_type): + _encoding = self.encoding + if (_encoding is not None and not (_encoding == 'utf-8')): + o = o.decode(_encoding) + if isinstance(o, string_types): + if self.ensure_ascii: + return encode_basestring_ascii(o) + else: + return encode_basestring(o) + # This doesn't pass the iterator directly to ''.join() because the + # exceptions aren't as detailed. The list call should be roughly + # equivalent to the PySequence_Fast that ''.join() would do. + chunks = self.iterencode(o, _one_shot=True) + if not isinstance(chunks, (list, tuple)): + chunks = list(chunks) + if self.ensure_ascii: + return ''.join(chunks) + else: + return u''.join(chunks) + + def iterencode(self, o, _one_shot=False): + """Encode the given object and yield each string + representation as available. + + For example:: + + for chunk in JSONEncoder().iterencode(bigobject): + mysocket.write(chunk) + + """ + if self.check_circular: + markers = {} + else: + markers = None + if self.ensure_ascii: + _encoder = encode_basestring_ascii + else: + _encoder = encode_basestring + if self.encoding != 'utf-8': + def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): + if isinstance(o, binary_type): + o = o.decode(_encoding) + return _orig_encoder(o) + + def floatstr(o, allow_nan=self.allow_nan, ignore_nan=self.ignore_nan, + _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf): + # Check for specials. Note that this type of test is processor + # and/or platform-specific, so do tests which don't depend on + # the internals. + + if o != o: + text = 'NaN' + elif o == _inf: + text = 'Infinity' + elif o == _neginf: + text = '-Infinity' + else: + return _repr(o) + + if ignore_nan: + text = 'null' + elif not allow_nan: + raise ValueError( + "Out of range float values are not JSON compliant: " + + repr(o)) + + return text + + + key_memo = {} + if (_one_shot and c_make_encoder is not None + and self.indent is None): + _iterencode = c_make_encoder( + markers, self.default, _encoder, self.indent, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, self.allow_nan, key_memo, self.use_decimal, + self.namedtuple_as_object, self.tuple_as_array, + self.bigint_as_string, self.item_sort_key, + self.encoding, self.for_json, self.ignore_nan, + Decimal) + else: + _iterencode = _make_iterencode( + markers, self.default, _encoder, self.indent, floatstr, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, _one_shot, self.use_decimal, + self.namedtuple_as_object, self.tuple_as_array, + self.bigint_as_string, self.item_sort_key, + self.encoding, self.for_json, + Decimal=Decimal) + try: + return _iterencode(o, 0) + finally: + key_memo.clear() + + +class JSONEncoderForHTML(JSONEncoder): + """An encoder that produces JSON safe to embed in HTML. + + To embed JSON content in, say, a script tag on a web page, the + characters &, < and > should be escaped. They cannot be escaped + with the usual entities (e.g. &) because they are not expanded + within <script> tags. + """ + + def encode(self, o): + # Override JSONEncoder.encode because it has hacks for + # performance that make things more complicated. + chunks = self.iterencode(o, True) + if self.ensure_ascii: + return ''.join(chunks) + else: + return u''.join(chunks) + + def iterencode(self, o, _one_shot=False): + chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot) + for chunk in chunks: + chunk = chunk.replace('&', '\\u0026') + chunk = chunk.replace('<', '\\u003c') + chunk = chunk.replace('>', '\\u003e') + yield chunk + + +def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, + _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, + _use_decimal, _namedtuple_as_object, _tuple_as_array, + _bigint_as_string, _item_sort_key, _encoding, _for_json, + ## HACK: hand-optimized bytecode; turn globals into locals + _PY3=PY3, + ValueError=ValueError, + string_types=string_types, + Decimal=Decimal, + dict=dict, + float=float, + id=id, + integer_types=integer_types, + isinstance=isinstance, + list=list, + str=str, + tuple=tuple, + ): + if _item_sort_key and not callable(_item_sort_key): + raise TypeError("item_sort_key must be None or callable") + elif _sort_keys and not _item_sort_key: + _item_sort_key = itemgetter(0) + + def _iterencode_list(lst, _current_indent_level): + if not lst: + yield '[]' + return + if markers is not None: + markerid = id(lst) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = lst + buf = '[' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + (_indent * _current_indent_level) + separator = _item_separator + newline_indent + buf += newline_indent + else: + newline_indent = None + separator = _item_separator + first = True + for value in lst: + if first: + first = False + else: + buf = separator + if (isinstance(value, string_types) or + (_PY3 and isinstance(value, binary_type))): + yield buf + _encoder(value) + elif value is None: + yield buf + 'null' + elif value is True: + yield buf + 'true' + elif value is False: + yield buf + 'false' + elif isinstance(value, integer_types): + yield ((buf + str(value)) + if (not _bigint_as_string or + (-1 << 53) < value < (1 << 53)) + else (buf + '"' + str(value) + '"')) + elif isinstance(value, float): + yield buf + _floatstr(value) + elif _use_decimal and isinstance(value, Decimal): + yield buf + str(value) + else: + yield buf + for_json = _for_json and getattr(value, 'for_json', None) + if for_json and callable(for_json): + chunks = _iterencode(for_json(), _current_indent_level) + elif isinstance(value, list): + chunks = _iterencode_list(value, _current_indent_level) + else: + _asdict = _namedtuple_as_object and getattr(value, '_asdict', None) + if _asdict and callable(_asdict): + chunks = _iterencode_dict(_asdict(), + _current_indent_level) + elif _tuple_as_array and isinstance(value, tuple): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + for chunk in chunks: + yield chunk + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + (_indent * _current_indent_level) + yield ']' + if markers is not None: + del markers[markerid] + + def _stringify_key(key): + if isinstance(key, string_types): # pragma: no cover + pass + elif isinstance(key, binary_type): + key = key.decode(_encoding) + elif isinstance(key, float): + key = _floatstr(key) + elif key is True: + key = 'true' + elif key is False: + key = 'false' + elif key is None: + key = 'null' + elif isinstance(key, integer_types): + key = str(key) + elif _use_decimal and isinstance(key, Decimal): + key = str(key) + elif _skipkeys: + key = None + else: + raise TypeError("key " + repr(key) + " is not a string") + return key + + def _iterencode_dict(dct, _current_indent_level): + if not dct: + yield '{}' + return + if markers is not None: + markerid = id(dct) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = dct + yield '{' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + (_indent * _current_indent_level) + item_separator = _item_separator + newline_indent + yield newline_indent + else: + newline_indent = None + item_separator = _item_separator + first = True + if _PY3: + iteritems = dct.items() + else: + iteritems = dct.iteritems() + if _item_sort_key: + items = [] + for k, v in dct.items(): + if not isinstance(k, string_types): + k = _stringify_key(k) + if k is None: + continue + items.append((k, v)) + items.sort(key=_item_sort_key) + else: + items = iteritems + for key, value in items: + if not (_item_sort_key or isinstance(key, string_types)): + key = _stringify_key(key) + if key is None: + # _skipkeys must be True + continue + if first: + first = False + else: + yield item_separator + yield _encoder(key) + yield _key_separator + if (isinstance(value, string_types) or + (_PY3 and isinstance(value, binary_type))): + yield _encoder(value) + elif value is None: + yield 'null' + elif value is True: + yield 'true' + elif value is False: + yield 'false' + elif isinstance(value, integer_types): + yield (str(value) + if (not _bigint_as_string or + (-1 << 53) < value < (1 << 53)) + else ('"' + str(value) + '"')) + elif isinstance(value, float): + yield _floatstr(value) + elif _use_decimal and isinstance(value, Decimal): + yield str(value) + else: + for_json = _for_json and getattr(value, 'for_json', None) + if for_json and callable(for_json): + chunks = _iterencode(for_json(), _current_indent_level) + elif isinstance(value, list): + chunks = _iterencode_list(value, _current_indent_level) + else: + _asdict = _namedtuple_as_object and getattr(value, '_asdict', None) + if _asdict and callable(_asdict): + chunks = _iterencode_dict(_asdict(), + _current_indent_level) + elif _tuple_as_array and isinstance(value, tuple): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + for chunk in chunks: + yield chunk + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + (_indent * _current_indent_level) + yield '}' + if markers is not None: + del markers[markerid] + + def _iterencode(o, _current_indent_level): + if (isinstance(o, string_types) or + (_PY3 and isinstance(o, binary_type))): + yield _encoder(o) + elif o is None: + yield 'null' + elif o is True: + yield 'true' + elif o is False: + yield 'false' + elif isinstance(o, integer_types): + yield (str(o) + if (not _bigint_as_string or + (-1 << 53) < o < (1 << 53)) + else ('"' + str(o) + '"')) + elif isinstance(o, float): + yield _floatstr(o) + else: + for_json = _for_json and getattr(o, 'for_json', None) + if for_json and callable(for_json): + for chunk in _iterencode(for_json(), _current_indent_level): + yield chunk + elif isinstance(o, list): + for chunk in _iterencode_list(o, _current_indent_level): + yield chunk + else: + _asdict = _namedtuple_as_object and getattr(o, '_asdict', None) + if _asdict and callable(_asdict): + for chunk in _iterencode_dict(_asdict(), + _current_indent_level): + yield chunk + elif (_tuple_as_array and isinstance(o, tuple)): + for chunk in _iterencode_list(o, _current_indent_level): + yield chunk + elif isinstance(o, dict): + for chunk in _iterencode_dict(o, _current_indent_level): + yield chunk + elif _use_decimal and isinstance(o, Decimal): + yield str(o) + else: + if markers is not None: + markerid = id(o) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = o + o = _default(o) + for chunk in _iterencode(o, _current_indent_level): + yield chunk + if markers is not None: + del markers[markerid] + + return _iterencode diff --git a/simplejson/ordered_dict.py b/simplejson/ordered_dict.py new file mode 100644 index 0000000..87ad888 --- /dev/null +++ b/simplejson/ordered_dict.py @@ -0,0 +1,119 @@ +"""Drop-in replacement for collections.OrderedDict by Raymond Hettinger + +http://code.activestate.com/recipes/576693/ + +""" +from UserDict import DictMixin + +# Modified from original to support Python 2.4, see +# http://code.google.com/p/simplejson/issues/detail?id=53 +try: + all +except NameError: + def all(seq): + for elem in seq: + if not elem: + return False + return True + +class OrderedDict(dict, DictMixin): + + def __init__(self, *args, **kwds): + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + try: + self.__end + except AttributeError: + self.clear() + self.update(*args, **kwds) + + def clear(self): + self.__end = end = [] + end += [None, end, end] # sentinel node for doubly linked list + self.__map = {} # key --> [key, prev, next] + dict.clear(self) + + def __setitem__(self, key, value): + if key not in self: + end = self.__end + curr = end[1] + curr[2] = end[1] = self.__map[key] = [key, curr, end] + dict.__setitem__(self, key, value) + + def __delitem__(self, key): + dict.__delitem__(self, key) + key, prev, next = self.__map.pop(key) + prev[2] = next + next[1] = prev + + def __iter__(self): + end = self.__end + curr = end[2] + while curr is not end: + yield curr[0] + curr = curr[2] + + def __reversed__(self): + end = self.__end + curr = end[1] + while curr is not end: + yield curr[0] + curr = curr[1] + + def popitem(self, last=True): + if not self: + raise KeyError('dictionary is empty') + # Modified from original to support Python 2.4, see + # http://code.google.com/p/simplejson/issues/detail?id=53 + if last: + key = reversed(self).next() + else: + key = iter(self).next() + value = self.pop(key) + return key, value + + def __reduce__(self): + items = [[k, self[k]] for k in self] + tmp = self.__map, self.__end + del self.__map, self.__end + inst_dict = vars(self).copy() + self.__map, self.__end = tmp + if inst_dict: + return (self.__class__, (items,), inst_dict) + return self.__class__, (items,) + + def keys(self): + return list(self) + + setdefault = DictMixin.setdefault + update = DictMixin.update + pop = DictMixin.pop + values = DictMixin.values + items = DictMixin.items + iterkeys = DictMixin.iterkeys + itervalues = DictMixin.itervalues + iteritems = DictMixin.iteritems + + def __repr__(self): + if not self: + return '%s()' % (self.__class__.__name__,) + return '%s(%r)' % (self.__class__.__name__, self.items()) + + def copy(self): + return self.__class__(self) + + @classmethod + def fromkeys(cls, iterable, value=None): + d = cls() + for key in iterable: + d[key] = value + return d + + def __eq__(self, other): + if isinstance(other, OrderedDict): + return len(self)==len(other) and \ + all(p==q for p, q in zip(self.items(), other.items())) + return dict.__eq__(self, other) + + def __ne__(self, other): + return not self == other diff --git a/simplejson/scanner.py b/simplejson/scanner.py new file mode 100644 index 0000000..6a0099f --- /dev/null +++ b/simplejson/scanner.py @@ -0,0 +1,125 @@ +"""JSON token scanner +""" +import re +def _import_c_make_scanner(): + try: + from simplejson._speedups import make_scanner + return make_scanner + except ImportError: + return None +c_make_scanner = _import_c_make_scanner() + +__all__ = ['make_scanner', 'JSONDecodeError'] + +NUMBER_RE = re.compile( + r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', + (re.VERBOSE | re.MULTILINE | re.DOTALL)) + +class JSONDecodeError(ValueError): + """Subclass of ValueError with the following additional properties: + + msg: The unformatted error message + doc: The JSON document being parsed + pos: The start index of doc where parsing failed + end: The end index of doc where parsing failed (may be None) + lineno: The line corresponding to pos + colno: The column corresponding to pos + endlineno: The line corresponding to end (may be None) + endcolno: The column corresponding to end (may be None) + + """ + # Note that this exception is used from _speedups + def __init__(self, msg, doc, pos, end=None): + ValueError.__init__(self, errmsg(msg, doc, pos, end=end)) + self.msg = msg + self.doc = doc + self.pos = pos + self.end = end + self.lineno, self.colno = linecol(doc, pos) + if end is not None: + self.endlineno, self.endcolno = linecol(doc, end) + else: + self.endlineno, self.endcolno = None, None + + +def linecol(doc, pos): + lineno = doc.count('\n', 0, pos) + 1 + if lineno == 1: + colno = pos + 1 + else: + colno = pos - doc.rindex('\n', 0, pos) + return lineno, colno + + +def errmsg(msg, doc, pos, end=None): + lineno, colno = linecol(doc, pos) + msg = msg.replace('%r', repr(doc[pos:pos + 1])) + if end is None: + fmt = '%s: line %d column %d (char %d)' + return fmt % (msg, lineno, colno, pos) + endlineno, endcolno = linecol(doc, end) + fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' + return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) + + +def py_make_scanner(context): + parse_object = context.parse_object + parse_array = context.parse_array + parse_string = context.parse_string + match_number = NUMBER_RE.match + encoding = context.encoding + strict = context.strict + parse_float = context.parse_float + parse_int = context.parse_int + parse_constant = context.parse_constant + object_hook = context.object_hook + object_pairs_hook = context.object_pairs_hook + memo = context.memo + + def _scan_once(string, idx): + errmsg = 'Expecting value' + try: + nextchar = string[idx] + except IndexError: + raise JSONDecodeError(errmsg, string, idx) + + if nextchar == '"': + return parse_string(string, idx + 1, encoding, strict) + elif nextchar == '{': + return parse_object((string, idx + 1), encoding, strict, + _scan_once, object_hook, object_pairs_hook, memo) + elif nextchar == '[': + return parse_array((string, idx + 1), _scan_once) + elif nextchar == 'n' and string[idx:idx + 4] == 'null': + return None, idx + 4 + elif nextchar == 't' and string[idx:idx + 4] == 'true': + return True, idx + 4 + elif nextchar == 'f' and string[idx:idx + 5] == 'false': + return False, idx + 5 + + m = match_number(string, idx) + if m is not None: + integer, frac, exp = m.groups() + if frac or exp: + res = parse_float(integer + (frac or '') + (exp or '')) + else: + res = parse_int(integer) + return res, m.end() + elif nextchar == 'N' and string[idx:idx + 3] == 'NaN': + return parse_constant('NaN'), idx + 3 + elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity': + return parse_constant('Infinity'), idx + 8 + elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': + return parse_constant('-Infinity'), idx + 9 + else: + raise JSONDecodeError(errmsg, string, idx) + + def scan_once(string, idx): + try: + return _scan_once(string, idx) + finally: + memo.clear() + + return scan_once + +make_scanner = c_make_scanner or py_make_scanner diff --git a/simplejson/tests/__init__.py b/simplejson/tests/__init__.py new file mode 100644 index 0000000..c01dfcb --- /dev/null +++ b/simplejson/tests/__init__.py @@ -0,0 +1,79 @@ +from __future__ import absolute_import +import unittest +import doctest +import sys + +class OptionalExtensionTestSuite(unittest.TestSuite): + def run(self, result): + import simplejson + run = unittest.TestSuite.run + run(self, result) + if simplejson._import_c_make_encoder() is None: + TestMissingSpeedups().run(result) + else: + simplejson._toggle_speedups(False) + run(self, result) + simplejson._toggle_speedups(True) + return result + +class TestMissingSpeedups(unittest.TestCase): + def runTest(self): + if hasattr(sys, 'pypy_translation_info'): + "PyPy doesn't need speedups! :)" + elif hasattr(self, 'skipTest'): + self.skipTest('_speedups.so is missing!') + +def additional_tests(suite=None): + import simplejson + import simplejson.encoder + import simplejson.decoder + if suite is None: + suite = unittest.TestSuite() + for mod in (simplejson, simplejson.encoder, simplejson.decoder): + suite.addTest(doctest.DocTestSuite(mod)) + suite.addTest(doctest.DocFileSuite('../../index.rst')) + return suite + + +def all_tests_suite(): + suite = unittest.TestLoader().loadTestsFromNames([ + 'simplejson.tests.test_bigint_as_string', + 'simplejson.tests.test_check_circular', + 'simplejson.tests.test_decode', + 'simplejson.tests.test_default', + 'simplejson.tests.test_dump', + 'simplejson.tests.test_encode_basestring_ascii', + 'simplejson.tests.test_encode_for_html', + 'simplejson.tests.test_errors', + 'simplejson.tests.test_fail', + 'simplejson.tests.test_float', + 'simplejson.tests.test_indent', + 'simplejson.tests.test_pass1', + 'simplejson.tests.test_pass2', + 'simplejson.tests.test_pass3', + 'simplejson.tests.test_recursion', + 'simplejson.tests.test_scanstring', + 'simplejson.tests.test_separators', + 'simplejson.tests.test_speedups', + 'simplejson.tests.test_unicode', + 'simplejson.tests.test_decimal', + 'simplejson.tests.test_tuple', + 'simplejson.tests.test_namedtuple', + 'simplejson.tests.test_tool', + 'simplejson.tests.test_for_json', + ]) + suite = additional_tests(suite) + return OptionalExtensionTestSuite([suite]) + + +def main(): + runner = unittest.TextTestRunner(verbosity=1 + sys.argv.count('-v')) + suite = all_tests_suite() + raise SystemExit(not runner.run(suite).wasSuccessful()) + + +if __name__ == '__main__': + import os + import sys + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + main() diff --git a/simplejson/tests/test_bigint_as_string.py b/simplejson/tests/test_bigint_as_string.py new file mode 100644 index 0000000..20ea64c --- /dev/null +++ b/simplejson/tests/test_bigint_as_string.py @@ -0,0 +1,58 @@ +from unittest import TestCase + +import simplejson as json +from simplejson.compat import long_type + +class TestBigintAsString(TestCase): + # Python 2.5, at least the one that ships on Mac OS X, calculates + # 2 ** 53 as 0! It manages to calculate 1 << 53 correctly. + values = [(200, 200), + ((1 << 53) - 1, 9007199254740991), + ((1 << 53), '9007199254740992'), + ((1 << 53) + 1, '9007199254740993'), + (-100, -100), + ((-1 << 53), '-9007199254740992'), + ((-1 << 53) - 1, '-9007199254740993'), + ((-1 << 53) + 1, -9007199254740991)] + + def test_ints(self): + for val, expect in self.values: + self.assertEqual( + val, + json.loads(json.dumps(val))) + self.assertEqual( + expect, + json.loads(json.dumps(val, bigint_as_string=True))) + + def test_lists(self): + for val, expect in self.values: + val = [val, val] + expect = [expect, expect] + self.assertEqual( + val, + json.loads(json.dumps(val))) + self.assertEqual( + expect, + json.loads(json.dumps(val, bigint_as_string=True))) + + def test_dicts(self): + for val, expect in self.values: + val = {'k': val} + expect = {'k': expect} + self.assertEqual( + val, + json.loads(json.dumps(val))) + self.assertEqual( + expect, + json.loads(json.dumps(val, bigint_as_string=True))) + + def test_dict_keys(self): + for val, _ in self.values: + expect = {str(val): 'value'} + val = {val: 'value'} + self.assertEqual( + expect, + json.loads(json.dumps(val))) + self.assertEqual( + expect, + json.loads(json.dumps(val, bigint_as_string=True))) diff --git a/simplejson/tests/test_check_circular.py b/simplejson/tests/test_check_circular.py new file mode 100644 index 0000000..af6463d --- /dev/null +++ b/simplejson/tests/test_check_circular.py @@ -0,0 +1,30 @@ +from unittest import TestCase +import simplejson as json + +def default_iterable(obj): + return list(obj) + +class TestCheckCircular(TestCase): + def test_circular_dict(self): + dct = {} + dct['a'] = dct + self.assertRaises(ValueError, json.dumps, dct) + + def test_circular_list(self): + lst = [] + lst.append(lst) + self.assertRaises(ValueError, json.dumps, lst) + + def test_circular_composite(self): + dct2 = {} + dct2['a'] = [] + dct2['a'].append(dct2) + self.assertRaises(ValueError, json.dumps, dct2) + + def test_circular_default(self): + json.dumps([set()], default=default_iterable) + self.assertRaises(TypeError, json.dumps, [set()]) + + def test_circular_off_default(self): + json.dumps([set()], default=default_iterable, check_circular=False) + self.assertRaises(TypeError, json.dumps, [set()], check_circular=False) diff --git a/simplejson/tests/test_decimal.py b/simplejson/tests/test_decimal.py new file mode 100644 index 0000000..2b0940b --- /dev/null +++ b/simplejson/tests/test_decimal.py @@ -0,0 +1,71 @@ +import decimal +from decimal import Decimal +from unittest import TestCase +from simplejson.compat import StringIO, reload_module + +import simplejson as json + +class TestDecimal(TestCase): + NUMS = "1.0", "10.00", "1.1", "1234567890.1234567890", "500" + def dumps(self, obj, **kw): + sio = StringIO() + json.dump(obj, sio, **kw) + res = json.dumps(obj, **kw) + self.assertEqual(res, sio.getvalue()) + return res + + def loads(self, s, **kw): + sio = StringIO(s) + res = json.loads(s, **kw) + self.assertEqual(res, json.load(sio, **kw)) + return res + + def test_decimal_encode(self): + for d in map(Decimal, self.NUMS): + self.assertEqual(self.dumps(d, use_decimal=True), str(d)) + + def test_decimal_decode(self): + for s in self.NUMS: + self.assertEqual(self.loads(s, parse_float=Decimal), Decimal(s)) + + def test_stringify_key(self): + for d in map(Decimal, self.NUMS): + v = {d: d} + self.assertEqual( + self.loads( + self.dumps(v, use_decimal=True), parse_float=Decimal), + {str(d): d}) + + def test_decimal_roundtrip(self): + for d in map(Decimal, self.NUMS): + # The type might not be the same (int and Decimal) but they + # should still compare equal. + for v in [d, [d], {'': d}]: + self.assertEqual( + self.loads( + self.dumps(v, use_decimal=True), parse_float=Decimal), + v) + + def test_decimal_defaults(self): + d = Decimal('1.1') + # use_decimal=True is the default + self.assertRaises(TypeError, json.dumps, d, use_decimal=False) + self.assertEqual('1.1', json.dumps(d)) + self.assertEqual('1.1', json.dumps(d, use_decimal=True)) + self.assertRaises(TypeError, json.dump, d, StringIO(), + use_decimal=False) + sio = StringIO() + json.dump(d, sio) + self.assertEqual('1.1', sio.getvalue()) + sio = StringIO() + json.dump(d, sio, use_decimal=True) + self.assertEqual('1.1', sio.getvalue()) + + def test_decimal_reload(self): + # Simulate a subinterpreter that reloads the Python modules but not + # the C code https://github.com/simplejson/simplejson/issues/34 + global Decimal + Decimal = reload_module(decimal).Decimal + import simplejson.encoder + simplejson.encoder.Decimal = Decimal + self.test_decimal_roundtrip() diff --git a/simplejson/tests/test_decode.py b/simplejson/tests/test_decode.py new file mode 100644 index 0000000..ea5c90a --- /dev/null +++ b/simplejson/tests/test_decode.py @@ -0,0 +1,88 @@ +from __future__ import absolute_import +import decimal +from unittest import TestCase + +import simplejson as json +from simplejson.compat import StringIO +from simplejson import OrderedDict + +class TestDecode(TestCase): + if not hasattr(TestCase, 'assertIs'): + def assertIs(self, a, b): + self.assertTrue(a is b, '%r is %r' % (a, b)) + + def test_decimal(self): + rval = json.loads('1.1', parse_float=decimal.Decimal) + self.assertTrue(isinstance(rval, decimal.Decimal)) + self.assertEqual(rval, decimal.Decimal('1.1')) + + def test_float(self): + rval = json.loads('1', parse_int=float) + self.assertTrue(isinstance(rval, float)) + self.assertEqual(rval, 1.0) + + def test_decoder_optimizations(self): + # Several optimizations were made that skip over calls to + # the whitespace regex, so this test is designed to try and + # exercise the uncommon cases. The array cases are already covered. + rval = json.loads('{ "key" : "value" , "k":"v" }') + self.assertEqual(rval, {"key":"value", "k":"v"}) + + def test_empty_objects(self): + s = '{}' + self.assertEqual(json.loads(s), eval(s)) + s = '[]' + self.assertEqual(json.loads(s), eval(s)) + s = '""' + self.assertEqual(json.loads(s), eval(s)) + + def test_object_pairs_hook(self): + s = '{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}' + p = [("xkd", 1), ("kcw", 2), ("art", 3), ("hxm", 4), + ("qrt", 5), ("pad", 6), ("hoy", 7)] + self.assertEqual(json.loads(s), eval(s)) + self.assertEqual(json.loads(s, object_pairs_hook=lambda x: x), p) + self.assertEqual(json.load(StringIO(s), + object_pairs_hook=lambda x: x), p) + od = json.loads(s, object_pairs_hook=OrderedDict) + self.assertEqual(od, OrderedDict(p)) + self.assertEqual(type(od), OrderedDict) + # the object_pairs_hook takes priority over the object_hook + self.assertEqual(json.loads(s, + object_pairs_hook=OrderedDict, + object_hook=lambda x: None), + OrderedDict(p)) + + def check_keys_reuse(self, source, loads): + rval = loads(source) + (a, b), (c, d) = sorted(rval[0]), sorted(rval[1]) + self.assertIs(a, c) + self.assertIs(b, d) + + def test_keys_reuse_str(self): + s = u'[{"a_key": 1, "b_\xe9": 2}, {"a_key": 3, "b_\xe9": 4}]'.encode('utf8') + self.check_keys_reuse(s, json.loads) + + def test_keys_reuse_unicode(self): + s = u'[{"a_key": 1, "b_\xe9": 2}, {"a_key": 3, "b_\xe9": 4}]' + self.check_keys_reuse(s, json.loads) + + def test_empty_strings(self): + self.assertEqual(json.loads('""'), "") + self.assertEqual(json.loads(u'""'), u"") + self.assertEqual(json.loads('[""]'), [""]) + self.assertEqual(json.loads(u'[""]'), [u""]) + + def test_raw_decode(self): + cls = json.decoder.JSONDecoder + self.assertEqual( + ({'a': {}}, 9), + cls().raw_decode("{\"a\": {}}")) + # http://code.google.com/p/simplejson/issues/detail?id=85 + self.assertEqual( + ({'a': {}}, 9), + cls(object_pairs_hook=dict).raw_decode("{\"a\": {}}")) + # https://github.com/simplejson/simplejson/pull/38 + self.assertEqual( + ({'a': {}}, 11), + cls().raw_decode(" \n{\"a\": {}}")) diff --git a/simplejson/tests/test_default.py b/simplejson/tests/test_default.py new file mode 100644 index 0000000..d1eacb8 --- /dev/null +++ b/simplejson/tests/test_default.py @@ -0,0 +1,9 @@ +from unittest import TestCase + +import simplejson as json + +class TestDefault(TestCase): + def test_default(self): + self.assertEqual( + json.dumps(type, default=repr), + json.dumps(repr(type))) diff --git a/simplejson/tests/test_dump.py b/simplejson/tests/test_dump.py new file mode 100644 index 0000000..1d118d9 --- /dev/null +++ b/simplejson/tests/test_dump.py @@ -0,0 +1,121 @@ +from unittest import TestCase +from simplejson.compat import StringIO, long_type, b, binary_type, PY3 +import simplejson as json + +def as_text_type(s): + if PY3 and isinstance(s, binary_type): + return s.decode('ascii') + return s + +class TestDump(TestCase): + def test_dump(self): + sio = StringIO() + json.dump({}, sio) + self.assertEqual(sio.getvalue(), '{}') + + def test_constants(self): + for c in [None, True, False]: + self.assertTrue(json.loads(json.dumps(c)) is c) + self.assertTrue(json.loads(json.dumps([c]))[0] is c) + self.assertTrue(json.loads(json.dumps({'a': c}))['a'] is c) + + def test_stringify_key(self): + items = [(b('bytes'), 'bytes'), + (1.0, '1.0'), + (10, '10'), + (True, 'true'), + (False, 'false'), + (None, 'null'), + (long_type(100), '100')] + for k, expect in items: + self.assertEqual( + json.loads(json.dumps({k: expect})), + {expect: expect}) + self.assertEqual( + json.loads(json.dumps({k: expect}, sort_keys=True)), + {expect: expect}) + self.assertRaises(TypeError, json.dumps, {json: 1}) + for v in [{}, {'other': 1}, {b('derp'): 1, 'herp': 2}]: + for sort_keys in [False, True]: + v0 = dict(v) + v0[json] = 1 + v1 = dict((as_text_type(key), val) for (key, val) in v.items()) + self.assertEqual( + json.loads(json.dumps(v0, skipkeys=True, sort_keys=sort_keys)), + v1) + self.assertEqual( + json.loads(json.dumps({'': v0}, skipkeys=True, sort_keys=sort_keys)), + {'': v1}) + self.assertEqual( + json.loads(json.dumps([v0], skipkeys=True, sort_keys=sort_keys)), + [v1]) + + def test_dumps(self): + self.assertEqual(json.dumps({}), '{}') + + def test_encode_truefalse(self): + self.assertEqual(json.dumps( + {True: False, False: True}, sort_keys=True), + '{"false": true, "true": false}') + self.assertEqual( + json.dumps( + {2: 3.0, + 4.0: long_type(5), + False: 1, + long_type(6): True, + "7": 0}, + sort_keys=True), + '{"2": 3.0, "4.0": 5, "6": true, "7": 0, "false": 1}') + + def test_ordered_dict(self): + # http://bugs.python.org/issue6105 + items = [('one', 1), ('two', 2), ('three', 3), ('four', 4), ('five', 5)] + s = json.dumps(json.OrderedDict(items)) + self.assertEqual( + s, + '{"one": 1, "two": 2, "three": 3, "four": 4, "five": 5}') + + def test_indent_unknown_type_acceptance(self): + """ + A test against the regression mentioned at `github issue 29`_. + + The indent parameter should accept any type which pretends to be + an instance of int or long when it comes to being multiplied by + strings, even if it is not actually an int or long, for + backwards compatibility. + + .. _github issue 29: + http://github.com/simplejson/simplejson/issue/29 + """ + + class AwesomeInt(object): + """An awesome reimplementation of integers""" + + def __init__(self, *args, **kwargs): + if len(args) > 0: + # [construct from literals, objects, etc.] + # ... + + # Finally, if args[0] is an integer, store it + if isinstance(args[0], int): + self._int = args[0] + + # [various methods] + + def __mul__(self, other): + # [various ways to multiply AwesomeInt objects] + # ... finally, if the right-hand operand is not awesome enough, + # try to do a normal integer multiplication + if hasattr(self, '_int'): + return self._int * other + else: + raise NotImplementedError("To do non-awesome things with" + " this object, please construct it from an integer!") + + s = json.dumps([0, 1, 2], indent=AwesomeInt(3)) + self.assertEqual(s, '[\n 0,\n 1,\n 2\n]') + + def test_accumulator(self): + # the C API uses an accumulator that collects after 100,000 appends + lst = [0] * 100000 + self.assertEqual(json.loads(json.dumps(lst)), lst) diff --git a/simplejson/tests/test_encode_basestring_ascii.py b/simplejson/tests/test_encode_basestring_ascii.py new file mode 100644 index 0000000..49706bf --- /dev/null +++ b/simplejson/tests/test_encode_basestring_ascii.py @@ -0,0 +1,47 @@ +from unittest import TestCase + +import simplejson.encoder +from simplejson.compat import b + +CASES = [ + (u'/\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?', '"/\\\\\\"\\ucafe\\ubabe\\uab98\\ufcde\\ubcda\\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"'), + (u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), + (u'controls', '"controls"'), + (u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), + (u'{"object with 1 member":["array with 1 element"]}', '"{\\"object with 1 member\\":[\\"array with 1 element\\"]}"'), + (u' s p a c e d ', '" s p a c e d "'), + (u'\U0001d120', '"\\ud834\\udd20"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + (b('\xce\xb1\xce\xa9'), '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + (b('\xce\xb1\xce\xa9'), '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + (u"`1~!@#$%^&*()_+-={':[,]}|;.</>?", '"`1~!@#$%^&*()_+-={\':[,]}|;.</>?"'), + (u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), + (u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), +] + +class TestEncodeBaseStringAscii(TestCase): + def test_py_encode_basestring_ascii(self): + self._test_encode_basestring_ascii(simplejson.encoder.py_encode_basestring_ascii) + + def test_c_encode_basestring_ascii(self): + if not simplejson.encoder.c_encode_basestring_ascii: + return + self._test_encode_basestring_ascii(simplejson.encoder.c_encode_basestring_ascii) + + def _test_encode_basestring_ascii(self, encode_basestring_ascii): + fname = encode_basestring_ascii.__name__ + for input_string, expect in CASES: + result = encode_basestring_ascii(input_string) + #self.assertEqual(result, expect, + # '{0!r} != {1!r} for {2}({3!r})'.format( + # result, expect, fname, input_string)) + self.assertEqual(result, expect, + '%r != %r for %s(%r)' % (result, expect, fname, input_string)) + + def test_sorted_dict(self): + items = [('one', 1), ('two', 2), ('three', 3), ('four', 4), ('five', 5)] + s = simplejson.dumps(dict(items), sort_keys=True) + self.assertEqual(s, '{"five": 5, "four": 4, "one": 1, "three": 3, "two": 2}') diff --git a/simplejson/tests/test_encode_for_html.py b/simplejson/tests/test_encode_for_html.py new file mode 100644 index 0000000..f995254 --- /dev/null +++ b/simplejson/tests/test_encode_for_html.py @@ -0,0 +1,30 @@ +import unittest + +import simplejson as json + +class TestEncodeForHTML(unittest.TestCase): + + def setUp(self): + self.decoder = json.JSONDecoder() + self.encoder = json.JSONEncoderForHTML() + + def test_basic_encode(self): + self.assertEqual(r'"\u0026"', self.encoder.encode('&')) + self.assertEqual(r'"\u003c"', self.encoder.encode('<')) + self.assertEqual(r'"\u003e"', self.encoder.encode('>')) + + def test_basic_roundtrip(self): + for char in '&<>': + self.assertEqual( + char, self.decoder.decode( + self.encoder.encode(char))) + + def test_prevent_script_breakout(self): + bad_string = '</script><script>alert("gotcha")</script>' + self.assertEqual( + r'"\u003c/script\u003e\u003cscript\u003e' + r'alert(\"gotcha\")\u003c/script\u003e"', + self.encoder.encode(bad_string)) + self.assertEqual( + bad_string, self.decoder.decode( + self.encoder.encode(bad_string))) diff --git a/simplejson/tests/test_errors.py b/simplejson/tests/test_errors.py new file mode 100644 index 0000000..6bc2fc8 --- /dev/null +++ b/simplejson/tests/test_errors.py @@ -0,0 +1,35 @@ +import sys +from unittest import TestCase + +import simplejson as json +from simplejson.compat import u, b + +class TestErrors(TestCase): + def test_string_keys_error(self): + data = [{'a': 'A', 'b': (2, 4), 'c': 3.0, ('d',): 'D tuple'}] + self.assertRaises(TypeError, json.dumps, data) + + def test_decode_error(self): + err = None + try: + json.loads('{}\na\nb') + except json.JSONDecodeError: + err = sys.exc_info()[1] + else: + self.fail('Expected JSONDecodeError') + self.assertEqual(err.lineno, 2) + self.assertEqual(err.colno, 1) + self.assertEqual(err.endlineno, 3) + self.assertEqual(err.endcolno, 2) + + def test_scan_error(self): + err = None + for t in (u, b): + try: + json.loads(t('{"asdf": "')) + except json.JSONDecodeError: + err = sys.exc_info()[1] + else: + self.fail('Expected JSONDecodeError') + self.assertEqual(err.lineno, 1) + self.assertEqual(err.colno, 10) diff --git a/simplejson/tests/test_fail.py b/simplejson/tests/test_fail.py new file mode 100644 index 0000000..788f3a5 --- /dev/null +++ b/simplejson/tests/test_fail.py @@ -0,0 +1,176 @@ +import sys +from unittest import TestCase + +import simplejson as json + +# 2007-10-05 +JSONDOCS = [ + # http://json.org/JSON_checker/test/fail1.json + '"A JSON payload should be an object or array, not a string."', + # http://json.org/JSON_checker/test/fail2.json + '["Unclosed array"', + # http://json.org/JSON_checker/test/fail3.json + '{unquoted_key: "keys must be quoted"}', + # http://json.org/JSON_checker/test/fail4.json + '["extra comma",]', + # http://json.org/JSON_checker/test/fail5.json + '["double extra comma",,]', + # http://json.org/JSON_checker/test/fail6.json + '[ , "<-- missing value"]', + # http://json.org/JSON_checker/test/fail7.json + '["Comma after the close"],', + # http://json.org/JSON_checker/test/fail8.json + '["Extra close"]]', + # http://json.org/JSON_checker/test/fail9.json + '{"Extra comma": true,}', + # http://json.org/JSON_checker/test/fail10.json + '{"Extra value after close": true} "misplaced quoted value"', + # http://json.org/JSON_checker/test/fail11.json + '{"Illegal expression": 1 + 2}', + # http://json.org/JSON_checker/test/fail12.json + '{"Illegal invocation": alert()}', + # http://json.org/JSON_checker/test/fail13.json + '{"Numbers cannot have leading zeroes": 013}', + # http://json.org/JSON_checker/test/fail14.json + '{"Numbers cannot be hex": 0x14}', + # http://json.org/JSON_checker/test/fail15.json + '["Illegal backslash escape: \\x15"]', + # http://json.org/JSON_checker/test/fail16.json + '[\\naked]', + # http://json.org/JSON_checker/test/fail17.json + '["Illegal backslash escape: \\017"]', + # http://json.org/JSON_checker/test/fail18.json + '[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', + # http://json.org/JSON_checker/test/fail19.json + '{"Missing colon" null}', + # http://json.org/JSON_checker/test/fail20.json + '{"Double colon":: null}', + # http://json.org/JSON_checker/test/fail21.json + '{"Comma instead of colon", null}', + # http://json.org/JSON_checker/test/fail22.json + '["Colon instead of comma": false]', + # http://json.org/JSON_checker/test/fail23.json + '["Bad value", truth]', + # http://json.org/JSON_checker/test/fail24.json + "['single quote']", + # http://json.org/JSON_checker/test/fail25.json + '["\ttab\tcharacter\tin\tstring\t"]', + # http://json.org/JSON_checker/test/fail26.json + '["tab\\ character\\ in\\ string\\ "]', + # http://json.org/JSON_checker/test/fail27.json + '["line\nbreak"]', + # http://json.org/JSON_checker/test/fail28.json + '["line\\\nbreak"]', + # http://json.org/JSON_checker/test/fail29.json + '[0e]', + # http://json.org/JSON_checker/test/fail30.json + '[0e+]', + # http://json.org/JSON_checker/test/fail31.json + '[0e+-1]', + # http://json.org/JSON_checker/test/fail32.json + '{"Comma instead if closing brace": true,', + # http://json.org/JSON_checker/test/fail33.json + '["mismatch"}', + # http://code.google.com/p/simplejson/issues/detail?id=3 + u'["A\u001FZ control characters in string"]', + # misc based on coverage + '{', + '{]', + '{"foo": "bar"]', + '{"foo": "bar"', + 'nul', + 'nulx', + '-', + '-x', + '-e', + '-e0', + '-Infinite', + '-Inf', + 'Infinit', + 'Infinite', + 'NaM', + 'NuN', + 'falsy', + 'fal', + 'trug', + 'tru', + '1e', + '1ex', + '1e-', + '1e-x', +] + +SKIPS = { + 1: "why not have a string payload?", + 18: "spec doesn't specify any nesting limitations", +} + +class TestFail(TestCase): + def test_failures(self): + for idx, doc in enumerate(JSONDOCS): + idx = idx + 1 + if idx in SKIPS: + json.loads(doc) + continue + try: + json.loads(doc) + except json.JSONDecodeError: + pass + else: + self.fail("Expected failure for fail%d.json: %r" % (idx, doc)) + + def test_array_decoder_issue46(self): + # http://code.google.com/p/simplejson/issues/detail?id=46 + for doc in [u'[,]', '[,]']: + try: + json.loads(doc) + except json.JSONDecodeError: + e = sys.exc_info()[1] + self.assertEqual(e.pos, 1) + self.assertEqual(e.lineno, 1) + self.assertEqual(e.colno, 2) + except Exception: + e = sys.exc_info()[1] + self.fail("Unexpected exception raised %r %s" % (e, e)) + else: + self.fail("Unexpected success parsing '[,]'") + + def test_truncated_input(self): + test_cases = [ + ('', 'Expecting value', 0), + ('[', "Expecting value or ']'", 1), + ('[42', "Expecting ',' delimiter", 3), + ('[42,', 'Expecting value', 4), + ('["', 'Unterminated string starting at', 1), + ('["spam', 'Unterminated string starting at', 1), + ('["spam"', "Expecting ',' delimiter", 7), + ('["spam",', 'Expecting value', 8), + ('{', 'Expecting property name enclosed in double quotes', 1), + ('{"', 'Unterminated string starting at', 1), + ('{"spam', 'Unterminated string starting at', 1), + ('{"spam"', "Expecting ':' delimiter", 7), + ('{"spam":', 'Expecting value', 8), + ('{"spam":42', "Expecting ',' delimiter", 10), + ('{"spam":42,', 'Expecting property name enclosed in double quotes', + 11), + ('"', 'Unterminated string starting at', 0), + ('"spam', 'Unterminated string starting at', 0), + ('[,', "Expecting value", 1), + ] + for data, msg, idx in test_cases: + try: + json.loads(data) + except json.JSONDecodeError: + e = sys.exc_info()[1] + self.assertEqual( + e.msg[:len(msg)], + msg, + "%r doesn't start with %r for %r" % (e.msg, msg, data)) + self.assertEqual( + e.pos, idx, + "pos %r != %r for %r" % (e.pos, idx, data)) + except Exception: + e = sys.exc_info()[1] + self.fail("Unexpected exception raised %r %s" % (e, e)) + else: + self.fail("Unexpected success parsing '%r'" % (data,)) diff --git a/simplejson/tests/test_float.py b/simplejson/tests/test_float.py new file mode 100644 index 0000000..e382ec2 --- /dev/null +++ b/simplejson/tests/test_float.py @@ -0,0 +1,35 @@ +import math +from unittest import TestCase +from simplejson.compat import long_type, text_type +import simplejson as json +from simplejson.decoder import NaN, PosInf, NegInf + +class TestFloat(TestCase): + def test_degenerates_allow(self): + for inf in (PosInf, NegInf): + self.assertEqual(json.loads(json.dumps(inf)), inf) + # Python 2.5 doesn't have math.isnan + nan = json.loads(json.dumps(NaN)) + self.assertTrue((0 + nan) != nan) + + def test_degenerates_ignore(self): + for f in (PosInf, NegInf, NaN): + self.assertEqual(json.loads(json.dumps(f, ignore_nan=True)), None) + + def test_degenerates_deny(self): + for f in (PosInf, NegInf, NaN): + self.assertRaises(ValueError, json.dumps, f, allow_nan=False) + + def test_floats(self): + for num in [1617161771.7650001, math.pi, math.pi**100, + math.pi**-100, 3.1]: + self.assertEqual(float(json.dumps(num)), num) + self.assertEqual(json.loads(json.dumps(num)), num) + self.assertEqual(json.loads(text_type(json.dumps(num))), num) + + def test_ints(self): + for num in [1, long_type(1), 1<<32, 1<<64]: + self.assertEqual(json.dumps(num), str(num)) + self.assertEqual(int(json.dumps(num)), num) + self.assertEqual(json.loads(json.dumps(num)), num) + self.assertEqual(json.loads(text_type(json.dumps(num))), num) diff --git a/simplejson/tests/test_for_json.py b/simplejson/tests/test_for_json.py new file mode 100644 index 0000000..b791b88 --- /dev/null +++ b/simplejson/tests/test_for_json.py @@ -0,0 +1,97 @@ +import unittest +import simplejson as json + + +class ForJson(object): + def for_json(self): + return {'for_json': 1} + + +class NestedForJson(object): + def for_json(self): + return {'nested': ForJson()} + + +class ForJsonList(object): + def for_json(self): + return ['list'] + + +class DictForJson(dict): + def for_json(self): + return {'alpha': 1} + + +class ListForJson(list): + def for_json(self): + return ['list'] + + +class TestForJson(unittest.TestCase): + def assertRoundTrip(self, obj, other, for_json=True): + if for_json is None: + # None will use the default + s = json.dumps(obj) + else: + s = json.dumps(obj, for_json=for_json) + self.assertEqual( + json.loads(s), + other) + + def test_for_json_encodes_stand_alone_object(self): + self.assertRoundTrip( + ForJson(), + ForJson().for_json()) + + def test_for_json_encodes_object_nested_in_dict(self): + self.assertRoundTrip( + {'hooray': ForJson()}, + {'hooray': ForJson().for_json()}) + + def test_for_json_encodes_object_nested_in_list_within_dict(self): + self.assertRoundTrip( + {'list': [0, ForJson(), 2, 3]}, + {'list': [0, ForJson().for_json(), 2, 3]}) + + def test_for_json_encodes_object_nested_within_object(self): + self.assertRoundTrip( + NestedForJson(), + {'nested': {'for_json': 1}}) + + def test_for_json_encodes_list(self): + self.assertRoundTrip( + ForJsonList(), + ForJsonList().for_json()) + + def test_for_json_encodes_list_within_object(self): + self.assertRoundTrip( + {'nested': ForJsonList()}, + {'nested': ForJsonList().for_json()}) + + def test_for_json_encodes_dict_subclass(self): + self.assertRoundTrip( + DictForJson(a=1), + DictForJson(a=1).for_json()) + + def test_for_json_encodes_list_subclass(self): + self.assertRoundTrip( + ListForJson(['l']), + ListForJson(['l']).for_json()) + + def test_for_json_ignored_if_not_true_with_dict_subclass(self): + for for_json in (None, False): + self.assertRoundTrip( + DictForJson(a=1), + {'a': 1}, + for_json=for_json) + + def test_for_json_ignored_if_not_true_with_list_subclass(self): + for for_json in (None, False): + self.assertRoundTrip( + ListForJson(['l']), + ['l'], + for_json=for_json) + + def test_raises_typeerror_if_for_json_not_true_with_object(self): + self.assertRaises(TypeError, json.dumps, ForJson()) + self.assertRaises(TypeError, json.dumps, ForJson(), for_json=False) diff --git a/simplejson/tests/test_indent.py b/simplejson/tests/test_indent.py new file mode 100644 index 0000000..cea25a5 --- /dev/null +++ b/simplejson/tests/test_indent.py @@ -0,0 +1,86 @@ +from unittest import TestCase +import textwrap + +import simplejson as json +from simplejson.compat import StringIO + +class TestIndent(TestCase): + def test_indent(self): + h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', + 'i-vhbjkhnth', + {'nifty': 87}, {'field': 'yes', 'morefield': False} ] + + expect = textwrap.dedent("""\ + [ + \t[ + \t\t"blorpie" + \t], + \t[ + \t\t"whoops" + \t], + \t[], + \t"d-shtaeou", + \t"d-nthiouh", + \t"i-vhbjkhnth", + \t{ + \t\t"nifty": 87 + \t}, + \t{ + \t\t"field": "yes", + \t\t"morefield": false + \t} + ]""") + + + d1 = json.dumps(h) + d2 = json.dumps(h, indent='\t', sort_keys=True, separators=(',', ': ')) + d3 = json.dumps(h, indent=' ', sort_keys=True, separators=(',', ': ')) + d4 = json.dumps(h, indent=2, sort_keys=True, separators=(',', ': ')) + + h1 = json.loads(d1) + h2 = json.loads(d2) + h3 = json.loads(d3) + h4 = json.loads(d4) + + self.assertEqual(h1, h) + self.assertEqual(h2, h) + self.assertEqual(h3, h) + self.assertEqual(h4, h) + self.assertEqual(d3, expect.replace('\t', ' ')) + self.assertEqual(d4, expect.replace('\t', ' ')) + # NOTE: Python 2.4 textwrap.dedent converts tabs to spaces, + # so the following is expected to fail. Python 2.4 is not a + # supported platform in simplejson 2.1.0+. + self.assertEqual(d2, expect) + + def test_indent0(self): + h = {3: 1} + def check(indent, expected): + d1 = json.dumps(h, indent=indent) + self.assertEqual(d1, expected) + + sio = StringIO() + json.dump(h, sio, indent=indent) + self.assertEqual(sio.getvalue(), expected) + + # indent=0 should emit newlines + check(0, '{\n"3": 1\n}') + # indent=None is more compact + check(None, '{"3": 1}') + + def test_separators(self): + lst = [1,2,3,4] + expect = '[\n1,\n2,\n3,\n4\n]' + expect_spaces = '[\n1, \n2, \n3, \n4\n]' + # Ensure that separators still works + self.assertEqual( + expect_spaces, + json.dumps(lst, indent=0, separators=(', ', ': '))) + # Force the new defaults + self.assertEqual( + expect, + json.dumps(lst, indent=0, separators=(',', ': '))) + # Added in 2.1.4 + self.assertEqual( + expect, + json.dumps(lst, indent=0)) diff --git a/simplejson/tests/test_item_sort_key.py b/simplejson/tests/test_item_sort_key.py new file mode 100644 index 0000000..b05bfc8 --- /dev/null +++ b/simplejson/tests/test_item_sort_key.py @@ -0,0 +1,20 @@ +from unittest import TestCase + +import simplejson as json +from operator import itemgetter + +class TestItemSortKey(TestCase): + def test_simple_first(self): + a = {'a': 1, 'c': 5, 'jack': 'jill', 'pick': 'axe', 'array': [1, 5, 6, 9], 'tuple': (83, 12, 3), 'crate': 'dog', 'zeak': 'oh'} + self.assertEqual( + '{"a": 1, "c": 5, "crate": "dog", "jack": "jill", "pick": "axe", "zeak": "oh", "array": [1, 5, 6, 9], "tuple": [83, 12, 3]}', + json.dumps(a, item_sort_key=json.simple_first)) + + def test_case(self): + a = {'a': 1, 'c': 5, 'Jack': 'jill', 'pick': 'axe', 'Array': [1, 5, 6, 9], 'tuple': (83, 12, 3), 'crate': 'dog', 'zeak': 'oh'} + self.assertEqual( + '{"Array": [1, 5, 6, 9], "Jack": "jill", "a": 1, "c": 5, "crate": "dog", "pick": "axe", "tuple": [83, 12, 3], "zeak": "oh"}', + json.dumps(a, item_sort_key=itemgetter(0))) + self.assertEqual( + '{"a": 1, "Array": [1, 5, 6, 9], "c": 5, "crate": "dog", "Jack": "jill", "pick": "axe", "tuple": [83, 12, 3], "zeak": "oh"}', + json.dumps(a, item_sort_key=lambda kv: kv[0].lower())) diff --git a/simplejson/tests/test_namedtuple.py b/simplejson/tests/test_namedtuple.py new file mode 100644 index 0000000..4387894 --- /dev/null +++ b/simplejson/tests/test_namedtuple.py @@ -0,0 +1,122 @@ +from __future__ import absolute_import +import unittest +import simplejson as json +from simplejson.compat import StringIO + +try: + from collections import namedtuple +except ImportError: + class Value(tuple): + def __new__(cls, *args): + return tuple.__new__(cls, args) + + def _asdict(self): + return {'value': self[0]} + class Point(tuple): + def __new__(cls, *args): + return tuple.__new__(cls, args) + + def _asdict(self): + return {'x': self[0], 'y': self[1]} +else: + Value = namedtuple('Value', ['value']) + Point = namedtuple('Point', ['x', 'y']) + +class DuckValue(object): + def __init__(self, *args): + self.value = Value(*args) + + def _asdict(self): + return self.value._asdict() + +class DuckPoint(object): + def __init__(self, *args): + self.point = Point(*args) + + def _asdict(self): + return self.point._asdict() + +class DeadDuck(object): + _asdict = None + +class DeadDict(dict): + _asdict = None + +CONSTRUCTORS = [ + lambda v: v, + lambda v: [v], + lambda v: [{'key': v}], +] + +class TestNamedTuple(unittest.TestCase): + def test_namedtuple_dumps(self): + for v in [Value(1), Point(1, 2), DuckValue(1), DuckPoint(1, 2)]: + d = v._asdict() + self.assertEqual(d, json.loads(json.dumps(v))) + self.assertEqual( + d, + json.loads(json.dumps(v, namedtuple_as_object=True))) + self.assertEqual(d, json.loads(json.dumps(v, tuple_as_array=False))) + self.assertEqual( + d, + json.loads(json.dumps(v, namedtuple_as_object=True, + tuple_as_array=False))) + + def test_namedtuple_dumps_false(self): + for v in [Value(1), Point(1, 2)]: + l = list(v) + self.assertEqual( + l, + json.loads(json.dumps(v, namedtuple_as_object=False))) + self.assertRaises(TypeError, json.dumps, v, + tuple_as_array=False, namedtuple_as_object=False) + + def test_namedtuple_dump(self): + for v in [Value(1), Point(1, 2), DuckValue(1), DuckPoint(1, 2)]: + d = v._asdict() + sio = StringIO() + json.dump(v, sio) + self.assertEqual(d, json.loads(sio.getvalue())) + sio = StringIO() + json.dump(v, sio, namedtuple_as_object=True) + self.assertEqual( + d, + json.loads(sio.getvalue())) + sio = StringIO() + json.dump(v, sio, tuple_as_array=False) + self.assertEqual(d, json.loads(sio.getvalue())) + sio = StringIO() + json.dump(v, sio, namedtuple_as_object=True, + tuple_as_array=False) + self.assertEqual( + d, + json.loads(sio.getvalue())) + + def test_namedtuple_dump_false(self): + for v in [Value(1), Point(1, 2)]: + l = list(v) + sio = StringIO() + json.dump(v, sio, namedtuple_as_object=False) + self.assertEqual( + l, + json.loads(sio.getvalue())) + self.assertRaises(TypeError, json.dump, v, StringIO(), + tuple_as_array=False, namedtuple_as_object=False) + + def test_asdict_not_callable_dump(self): + for f in CONSTRUCTORS: + self.assertRaises(TypeError, + json.dump, f(DeadDuck()), StringIO(), namedtuple_as_object=True) + sio = StringIO() + json.dump(f(DeadDict()), sio, namedtuple_as_object=True) + self.assertEqual( + json.dumps(f({})), + sio.getvalue()) + + def test_asdict_not_callable_dumps(self): + for f in CONSTRUCTORS: + self.assertRaises(TypeError, + json.dumps, f(DeadDuck()), namedtuple_as_object=True) + self.assertEqual( + json.dumps(f({})), + json.dumps(f(DeadDict()), namedtuple_as_object=True)) diff --git a/simplejson/tests/test_pass1.py b/simplejson/tests/test_pass1.py new file mode 100644 index 0000000..f0b5b10 --- /dev/null +++ b/simplejson/tests/test_pass1.py @@ -0,0 +1,71 @@ +from unittest import TestCase + +import simplejson as json + +# from http://json.org/JSON_checker/test/pass1.json +JSON = r''' +[ + "JSON Test Pattern pass1", + {"object with 1 member":["array with 1 element"]}, + {}, + [], + -42, + true, + false, + null, + { + "integer": 1234567890, + "real": -9876.543210, + "e": 0.123456789e-12, + "E": 1.234567890E+34, + "": 23456789012E66, + "zero": 0, + "one": 1, + "space": " ", + "quote": "\"", + "backslash": "\\", + "controls": "\b\f\n\r\t", + "slash": "/ & \/", + "alpha": "abcdefghijklmnopqrstuvwyz", + "ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ", + "digit": "0123456789", + "special": "`1~!@#$%^&*()_+-={':[,]}|;.</>?", + "hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A", + "true": true, + "false": false, + "null": null, + "array":[ ], + "object":{ }, + "address": "50 St. James Street", + "url": "http://www.JSON.org/", + "comment": "// /* <!-- --", + "# -- --> */": " ", + " s p a c e d " :[1,2 , 3 + +, + +4 , 5 , 6 ,7 ],"compact": [1,2,3,4,5,6,7], + "jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}", + "quotes": "" \u0022 %22 0x22 034 "", + "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?" +: "A key can be any string" + }, + 0.5 ,98.6 +, +99.44 +, + +1066, +1e1, +0.1e1, +1e-1, +1e00,2e+00,2e-00 +,"rosebud"] +''' + +class TestPass1(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEqual(res, json.loads(out)) diff --git a/simplejson/tests/test_pass2.py b/simplejson/tests/test_pass2.py new file mode 100644 index 0000000..5d812b3 --- /dev/null +++ b/simplejson/tests/test_pass2.py @@ -0,0 +1,14 @@ +from unittest import TestCase +import simplejson as json + +# from http://json.org/JSON_checker/test/pass2.json +JSON = r''' +[[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]] +''' + +class TestPass2(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEqual(res, json.loads(out)) diff --git a/simplejson/tests/test_pass3.py b/simplejson/tests/test_pass3.py new file mode 100644 index 0000000..821d60b --- /dev/null +++ b/simplejson/tests/test_pass3.py @@ -0,0 +1,20 @@ +from unittest import TestCase + +import simplejson as json + +# from http://json.org/JSON_checker/test/pass3.json +JSON = r''' +{ + "JSON Test Pattern pass3": { + "The outermost value": "must be an object or array.", + "In this test": "It is an object." + } +} +''' + +class TestPass3(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEqual(res, json.loads(out)) diff --git a/simplejson/tests/test_recursion.py b/simplejson/tests/test_recursion.py new file mode 100644 index 0000000..662eb66 --- /dev/null +++ b/simplejson/tests/test_recursion.py @@ -0,0 +1,67 @@ +from unittest import TestCase + +import simplejson as json + +class JSONTestObject: + pass + + +class RecursiveJSONEncoder(json.JSONEncoder): + recurse = False + def default(self, o): + if o is JSONTestObject: + if self.recurse: + return [JSONTestObject] + else: + return 'JSONTestObject' + return json.JSONEncoder.default(o) + + +class TestRecursion(TestCase): + def test_listrecursion(self): + x = [] + x.append(x) + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on list recursion") + x = [] + y = [x] + x.append(y) + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on alternating list recursion") + y = [] + x = [y, y] + # ensure that the marker is cleared + json.dumps(x) + + def test_dictrecursion(self): + x = {} + x["test"] = x + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on dict recursion") + x = {} + y = {"a": x, "b": x} + # ensure that the marker is cleared + json.dumps(y) + + def test_defaultrecursion(self): + enc = RecursiveJSONEncoder() + self.assertEqual(enc.encode(JSONTestObject), '"JSONTestObject"') + enc.recurse = True + try: + enc.encode(JSONTestObject) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on default recursion") diff --git a/simplejson/tests/test_scanstring.py b/simplejson/tests/test_scanstring.py new file mode 100644 index 0000000..3d98f0d --- /dev/null +++ b/simplejson/tests/test_scanstring.py @@ -0,0 +1,194 @@ +import sys +from unittest import TestCase + +import simplejson as json +import simplejson.decoder +from simplejson.compat import b, PY3 + +class TestScanString(TestCase): + # The bytes type is intentionally not used in most of these tests + # under Python 3 because the decoder immediately coerces to str before + # calling scanstring. In Python 2 we are testing the code paths + # for both unicode and str. + # + # The reason this is done is because Python 3 would require + # entirely different code paths for parsing bytes and str. + # + def test_py_scanstring(self): + self._test_scanstring(simplejson.decoder.py_scanstring) + + def test_c_scanstring(self): + if not simplejson.decoder.c_scanstring: + return + self._test_scanstring(simplejson.decoder.c_scanstring) + + def _test_scanstring(self, scanstring): + if sys.maxunicode == 65535: + self.assertEqual( + scanstring(u'"z\U0001d120x"', 1, None, True), + (u'z\U0001d120x', 6)) + else: + self.assertEqual( + scanstring(u'"z\U0001d120x"', 1, None, True), + (u'z\U0001d120x', 5)) + + self.assertEqual( + scanstring('"\\u007b"', 1, None, True), + (u'{', 8)) + + self.assertEqual( + scanstring('"A JSON payload should be an object or array, not a string."', 1, None, True), + (u'A JSON payload should be an object or array, not a string.', 60)) + + self.assertEqual( + scanstring('["Unclosed array"', 2, None, True), + (u'Unclosed array', 17)) + + self.assertEqual( + scanstring('["extra comma",]', 2, None, True), + (u'extra comma', 14)) + + self.assertEqual( + scanstring('["double extra comma",,]', 2, None, True), + (u'double extra comma', 21)) + + self.assertEqual( + scanstring('["Comma after the close"],', 2, None, True), + (u'Comma after the close', 24)) + + self.assertEqual( + scanstring('["Extra close"]]', 2, None, True), + (u'Extra close', 14)) + + self.assertEqual( + scanstring('{"Extra comma": true,}', 2, None, True), + (u'Extra comma', 14)) + + self.assertEqual( + scanstring('{"Extra value after close": true} "misplaced quoted value"', 2, None, True), + (u'Extra value after close', 26)) + + self.assertEqual( + scanstring('{"Illegal expression": 1 + 2}', 2, None, True), + (u'Illegal expression', 21)) + + self.assertEqual( + scanstring('{"Illegal invocation": alert()}', 2, None, True), + (u'Illegal invocation', 21)) + + self.assertEqual( + scanstring('{"Numbers cannot have leading zeroes": 013}', 2, None, True), + (u'Numbers cannot have leading zeroes', 37)) + + self.assertEqual( + scanstring('{"Numbers cannot be hex": 0x14}', 2, None, True), + (u'Numbers cannot be hex', 24)) + + self.assertEqual( + scanstring('[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', 21, None, True), + (u'Too deep', 30)) + + self.assertEqual( + scanstring('{"Missing colon" null}', 2, None, True), + (u'Missing colon', 16)) + + self.assertEqual( + scanstring('{"Double colon":: null}', 2, None, True), + (u'Double colon', 15)) + + self.assertEqual( + scanstring('{"Comma instead of colon", null}', 2, None, True), + (u'Comma instead of colon', 25)) + + self.assertEqual( + scanstring('["Colon instead of comma": false]', 2, None, True), + (u'Colon instead of comma', 25)) + + self.assertEqual( + scanstring('["Bad value", truth]', 2, None, True), + (u'Bad value', 12)) + + for c in map(chr, range(0x00, 0x1f)): + self.assertEqual( + scanstring(c + '"', 0, None, False), + (c, 2)) + self.assertRaises( + ValueError, + scanstring, c + '"', 0, None, True) + + self.assertRaises(ValueError, scanstring, '', 0, None, True) + self.assertRaises(ValueError, scanstring, 'a', 0, None, True) + self.assertRaises(ValueError, scanstring, '\\', 0, None, True) + self.assertRaises(ValueError, scanstring, '\\u', 0, None, True) + self.assertRaises(ValueError, scanstring, '\\u0', 0, None, True) + self.assertRaises(ValueError, scanstring, '\\u01', 0, None, True) + self.assertRaises(ValueError, scanstring, '\\u012', 0, None, True) + self.assertRaises(ValueError, scanstring, '\\u0123', 0, None, True) + if sys.maxunicode > 65535: + self.assertRaises(ValueError, + scanstring, '\\ud834\\u"', 0, None, True) + self.assertRaises(ValueError, + scanstring, '\\ud834\\x0123"', 0, None, True) + + def test_issue3623(self): + self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1, + "xxx") + self.assertRaises(UnicodeDecodeError, + json.encoder.encode_basestring_ascii, b("xx\xff")) + + def test_overflow(self): + # Python 2.5 does not have maxsize, Python 3 does not have maxint + maxsize = getattr(sys, 'maxsize', getattr(sys, 'maxint', None)) + assert maxsize is not None + self.assertRaises(OverflowError, json.decoder.scanstring, "xxx", + maxsize + 1) + + def test_surrogates(self): + scanstring = json.decoder.scanstring + + def assertScan(given, expect, test_utf8=True): + givens = [given] + if not PY3 and test_utf8: + givens.append(given.encode('utf8')) + for given in givens: + (res, count) = scanstring(given, 1, None, True) + self.assertEqual(len(given), count) + self.assertEqual(res, expect) + + assertScan( + u'"z\\ud834\\u0079x"', + u'z\ud834yx') + assertScan( + u'"z\\ud834\\udd20x"', + u'z\U0001d120x') + assertScan( + u'"z\\ud834\\ud834\\udd20x"', + u'z\ud834\U0001d120x') + assertScan( + u'"z\\ud834x"', + u'z\ud834x') + assertScan( + u'"z\\udd20x"', + u'z\udd20x') + assertScan( + u'"z\ud834x"', + u'z\ud834x') + # It may look strange to join strings together, but Python is drunk. + # https://gist.github.com/etrepum/5538443 + assertScan( + u'"z\\ud834\udd20x12345"', + u''.join([u'z\ud834', u'\udd20x12345'])) + assertScan( + u'"z\ud834\\udd20x"', + u''.join([u'z\ud834', u'\udd20x'])) + # these have different behavior given UTF8 input, because the surrogate + # pair may be joined (in maxunicode > 65535 builds) + assertScan( + u''.join([u'"z\ud834', u'\udd20x"']), + u''.join([u'z\ud834', u'\udd20x']), + test_utf8=False) + + self.assertRaises(ValueError, + scanstring, u'"z\\ud83x"', 1, None, True) + self.assertRaises(ValueError, + scanstring, u'"z\\ud834\\udd2x"', 1, None, True) diff --git a/simplejson/tests/test_separators.py b/simplejson/tests/test_separators.py new file mode 100644 index 0000000..91b4d4f --- /dev/null +++ b/simplejson/tests/test_separators.py @@ -0,0 +1,42 @@ +import textwrap +from unittest import TestCase + +import simplejson as json + + +class TestSeparators(TestCase): + def test_separators(self): + h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth', + {'nifty': 87}, {'field': 'yes', 'morefield': False} ] + + expect = textwrap.dedent("""\ + [ + [ + "blorpie" + ] , + [ + "whoops" + ] , + [] , + "d-shtaeou" , + "d-nthiouh" , + "i-vhbjkhnth" , + { + "nifty" : 87 + } , + { + "field" : "yes" , + "morefield" : false + } + ]""") + + + d1 = json.dumps(h) + d2 = json.dumps(h, indent=' ', sort_keys=True, separators=(' ,', ' : ')) + + h1 = json.loads(d1) + h2 = json.loads(d2) + + self.assertEqual(h1, h) + self.assertEqual(h2, h) + self.assertEqual(d2, expect) diff --git a/simplejson/tests/test_speedups.py b/simplejson/tests/test_speedups.py new file mode 100644 index 0000000..825ecf2 --- /dev/null +++ b/simplejson/tests/test_speedups.py @@ -0,0 +1,20 @@ +from unittest import TestCase + +from simplejson import encoder, scanner + +def has_speedups(): + return encoder.c_make_encoder is not None + +class TestDecode(TestCase): + def test_make_scanner(self): + if not has_speedups(): + return + self.assertRaises(AttributeError, scanner.c_make_scanner, 1) + + def test_make_encoder(self): + if not has_speedups(): + return + self.assertRaises(TypeError, encoder.c_make_encoder, + None, + "\xCD\x7D\x3D\x4E\x12\x4C\xF9\x79\xD7\x52\xBA\x82\xF2\x27\x4A\x7D\xA0\xCA\x75", + None) diff --git a/simplejson/tests/test_tool.py b/simplejson/tests/test_tool.py new file mode 100644 index 0000000..ac2a14c --- /dev/null +++ b/simplejson/tests/test_tool.py @@ -0,0 +1,97 @@ +from __future__ import with_statement +import os +import sys +import textwrap +import unittest +import subprocess +import tempfile +try: + # Python 3.x + from test.support import strip_python_stderr +except ImportError: + # Python 2.6+ + try: + from test.test_support import strip_python_stderr + except ImportError: + # Python 2.5 + import re + def strip_python_stderr(stderr): + return re.sub( + r"\[\d+ refs\]\r?\n?$".encode(), + "".encode(), + stderr).strip() + +class TestTool(unittest.TestCase): + data = """ + + [["blorpie"],[ "whoops" ] , [ + ],\t"d-shtaeou",\r"d-nthiouh", + "i-vhbjkhnth", {"nifty":87}, {"morefield" :\tfalse,"field" + :"yes"} ] + """ + + expect = textwrap.dedent("""\ + [ + [ + "blorpie" + ], + [ + "whoops" + ], + [], + "d-shtaeou", + "d-nthiouh", + "i-vhbjkhnth", + { + "nifty": 87 + }, + { + "field": "yes", + "morefield": false + } + ] + """) + + def runTool(self, args=None, data=None): + argv = [sys.executable, '-m', 'simplejson.tool'] + if args: + argv.extend(args) + proc = subprocess.Popen(argv, + stdin=subprocess.PIPE, + stderr=subprocess.PIPE, + stdout=subprocess.PIPE) + out, err = proc.communicate(data) + self.assertEqual(strip_python_stderr(err), ''.encode()) + self.assertEqual(proc.returncode, 0) + return out + + def test_stdin_stdout(self): + self.assertEqual( + self.runTool(data=self.data.encode()), + self.expect.encode()) + + def test_infile_stdout(self): + with tempfile.NamedTemporaryFile() as infile: + infile.write(self.data.encode()) + infile.flush() + self.assertEqual( + self.runTool(args=[infile.name]), + self.expect.encode()) + + def test_infile_outfile(self): + with tempfile.NamedTemporaryFile() as infile: + infile.write(self.data.encode()) + infile.flush() + # outfile will get overwritten by tool, so the delete + # may not work on some platforms. Do it manually. + outfile = tempfile.NamedTemporaryFile() + try: + self.assertEqual( + self.runTool(args=[infile.name, outfile.name]), + ''.encode()) + with open(outfile.name, 'rb') as f: + self.assertEqual(f.read(), self.expect.encode()) + finally: + outfile.close() + if os.path.exists(outfile.name): + os.unlink(outfile.name) diff --git a/simplejson/tests/test_tuple.py b/simplejson/tests/test_tuple.py new file mode 100644 index 0000000..a6a9910 --- /dev/null +++ b/simplejson/tests/test_tuple.py @@ -0,0 +1,51 @@ +import unittest + +from simplejson.compat import StringIO +import simplejson as json + +class TestTuples(unittest.TestCase): + def test_tuple_array_dumps(self): + t = (1, 2, 3) + expect = json.dumps(list(t)) + # Default is True + self.assertEqual(expect, json.dumps(t)) + self.assertEqual(expect, json.dumps(t, tuple_as_array=True)) + self.assertRaises(TypeError, json.dumps, t, tuple_as_array=False) + # Ensure that the "default" does not get called + self.assertEqual(expect, json.dumps(t, default=repr)) + self.assertEqual(expect, json.dumps(t, tuple_as_array=True, + default=repr)) + # Ensure that the "default" gets called + self.assertEqual( + json.dumps(repr(t)), + json.dumps(t, tuple_as_array=False, default=repr)) + + def test_tuple_array_dump(self): + t = (1, 2, 3) + expect = json.dumps(list(t)) + # Default is True + sio = StringIO() + json.dump(t, sio) + self.assertEqual(expect, sio.getvalue()) + sio = StringIO() + json.dump(t, sio, tuple_as_array=True) + self.assertEqual(expect, sio.getvalue()) + self.assertRaises(TypeError, json.dump, t, StringIO(), + tuple_as_array=False) + # Ensure that the "default" does not get called + sio = StringIO() + json.dump(t, sio, default=repr) + self.assertEqual(expect, sio.getvalue()) + sio = StringIO() + json.dump(t, sio, tuple_as_array=True, default=repr) + self.assertEqual(expect, sio.getvalue()) + # Ensure that the "default" gets called + sio = StringIO() + json.dump(t, sio, tuple_as_array=False, default=repr) + self.assertEqual( + json.dumps(repr(t)), + sio.getvalue()) + +class TestNamedTuple(unittest.TestCase): + def test_namedtuple_dump(self): + pass diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py new file mode 100644 index 0000000..f04cc5c --- /dev/null +++ b/simplejson/tests/test_unicode.py @@ -0,0 +1,145 @@ +import sys +from unittest import TestCase + +import simplejson as json +from simplejson.compat import unichr, text_type, b, u + +class TestUnicode(TestCase): + def test_encoding1(self): + encoder = json.JSONEncoder(encoding='utf-8') + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + s = u.encode('utf-8') + ju = encoder.encode(u) + js = encoder.encode(s) + self.assertEqual(ju, js) + + def test_encoding2(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + s = u.encode('utf-8') + ju = json.dumps(u, encoding='utf-8') + js = json.dumps(s, encoding='utf-8') + self.assertEqual(ju, js) + + def test_encoding3(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps(u) + self.assertEqual(j, '"\\u03b1\\u03a9"') + + def test_encoding4(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps([u]) + self.assertEqual(j, '["\\u03b1\\u03a9"]') + + def test_encoding5(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps(u, ensure_ascii=False) + self.assertEqual(j, u'"' + u + u'"') + + def test_encoding6(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps([u], ensure_ascii=False) + self.assertEqual(j, u'["' + u + u'"]') + + def test_big_unicode_encode(self): + u = u'\U0001d120' + self.assertEqual(json.dumps(u), '"\\ud834\\udd20"') + self.assertEqual(json.dumps(u, ensure_ascii=False), u'"\U0001d120"') + + def test_big_unicode_decode(self): + u = u'z\U0001d120x' + self.assertEqual(json.loads('"' + u + '"'), u) + self.assertEqual(json.loads('"z\\ud834\\udd20x"'), u) + + def test_unicode_decode(self): + for i in range(0, 0xd7ff): + u = unichr(i) + #s = '"\\u{0:04x}"'.format(i) + s = '"\\u%04x"' % (i,) + self.assertEqual(json.loads(s), u) + + def test_object_pairs_hook_with_unicode(self): + s = u'{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}' + p = [(u"xkd", 1), (u"kcw", 2), (u"art", 3), (u"hxm", 4), + (u"qrt", 5), (u"pad", 6), (u"hoy", 7)] + self.assertEqual(json.loads(s), eval(s)) + self.assertEqual(json.loads(s, object_pairs_hook=lambda x: x), p) + od = json.loads(s, object_pairs_hook=json.OrderedDict) + self.assertEqual(od, json.OrderedDict(p)) + self.assertEqual(type(od), json.OrderedDict) + # the object_pairs_hook takes priority over the object_hook + self.assertEqual(json.loads(s, + object_pairs_hook=json.OrderedDict, + object_hook=lambda x: None), + json.OrderedDict(p)) + + + def test_default_encoding(self): + self.assertEqual(json.loads(u'{"a": "\xe9"}'.encode('utf-8')), + {'a': u'\xe9'}) + + def test_unicode_preservation(self): + self.assertEqual(type(json.loads(u'""')), text_type) + self.assertEqual(type(json.loads(u'"a"')), text_type) + self.assertEqual(type(json.loads(u'["a"]')[0]), text_type) + + def test_ensure_ascii_false_returns_unicode(self): + # http://code.google.com/p/simplejson/issues/detail?id=48 + self.assertEqual(type(json.dumps([], ensure_ascii=False)), text_type) + self.assertEqual(type(json.dumps(0, ensure_ascii=False)), text_type) + self.assertEqual(type(json.dumps({}, ensure_ascii=False)), text_type) + self.assertEqual(type(json.dumps("", ensure_ascii=False)), text_type) + + def test_ensure_ascii_false_bytestring_encoding(self): + # http://code.google.com/p/simplejson/issues/detail?id=48 + doc1 = {u'quux': b('Arr\xc3\xaat sur images')} + doc2 = {u'quux': u('Arr\xeat sur images')} + doc_ascii = '{"quux": "Arr\\u00eat sur images"}' + doc_unicode = u'{"quux": "Arr\xeat sur images"}' + self.assertEqual(json.dumps(doc1), doc_ascii) + self.assertEqual(json.dumps(doc2), doc_ascii) + self.assertEqual(json.dumps(doc1, ensure_ascii=False), doc_unicode) + self.assertEqual(json.dumps(doc2, ensure_ascii=False), doc_unicode) + + def test_ensure_ascii_linebreak_encoding(self): + # http://timelessrepo.com/json-isnt-a-javascript-subset + s1 = u'\u2029\u2028' + s2 = s1.encode('utf8') + expect = '"\\u2029\\u2028"' + self.assertEqual(json.dumps(s1), expect) + self.assertEqual(json.dumps(s2), expect) + self.assertEqual(json.dumps(s1, ensure_ascii=False), expect) + self.assertEqual(json.dumps(s2, ensure_ascii=False), expect) + + def test_invalid_escape_sequences(self): + # incomplete escape sequence + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u1') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u12') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u123') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u1234') + # invalid escape sequence + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u123x"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u12x4"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u1x34"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ux234"') + if sys.maxunicode > 65535: + # invalid escape sequence for low surrogate + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u0"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u00"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u000"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u000x"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u00x0"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u0x00"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\ux000"') + + def test_ensure_ascii_still_works(self): + # in the ascii range, ensure that everything is the same + for c in map(unichr, range(0, 127)): + self.assertEqual( + json.dumps(c, ensure_ascii=False), + json.dumps(c)) + snowman = u'\N{SNOWMAN}' + self.assertEqual( + json.dumps(c, ensure_ascii=False), + '"' + c + '"') diff --git a/simplejson/tool.py b/simplejson/tool.py new file mode 100644 index 0000000..062e8e2 --- /dev/null +++ b/simplejson/tool.py @@ -0,0 +1,42 @@ +r"""Command-line tool to validate and pretty-print JSON + +Usage:: + + $ echo '{"json":"obj"}' | python -m simplejson.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -m simplejson.tool + Expecting property name: line 1 column 2 (char 2) + +""" +from __future__ import with_statement +import sys +import simplejson as json + +def main(): + if len(sys.argv) == 1: + infile = sys.stdin + outfile = sys.stdout + elif len(sys.argv) == 2: + infile = open(sys.argv[1], 'r') + outfile = sys.stdout + elif len(sys.argv) == 3: + infile = open(sys.argv[1], 'r') + outfile = open(sys.argv[2], 'w') + else: + raise SystemExit(sys.argv[0] + " [infile [outfile]]") + with infile: + try: + obj = json.load(infile, + object_pairs_hook=json.OrderedDict, + use_decimal=True) + except ValueError: + raise SystemExit(sys.exc_info()[1]) + with outfile: + json.dump(obj, outfile, sort_keys=True, indent=' ', use_decimal=True) + outfile.write('\n') + + +if __name__ == '__main__': + main() diff --git a/ssh_config b/ssh_config new file mode 100644 index 0000000..4e93034 --- /dev/null +++ b/ssh_config @@ -0,0 +1,5 @@ +Host * +User irisaple_pacemaker2 +StrictHostKeyChecking no +LogLevel QUIET +BatchMode yes |
