-
-
Save U29/1bb9b5d4e42cd694ded9a3a0c72664e0 to your computer and use it in GitHub Desktop.
<component name="InspectionProjectProfileManager"> | |
<settings> | |
<option name="useProjectProfile" value="false" /> | |
<option name="USE_PROJECT_PROFILE" value="false" /> | |
<version value="1.0" /> | |
</settings> | |
</component> |
<component name="libraryTable"> | |
<library name="R User Library"> | |
<CLASSES /> | |
<SOURCES /> | |
</library> | |
</component> |
<?xml version="1.0" encoding="UTF-8"?> | |
<project version="4"> | |
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (pygametest)" project-jdk-type="Python SDK" /> | |
</project> |
<?xml version="1.0" encoding="UTF-8"?> | |
<project version="4"> | |
<component name="ProjectModuleManager"> | |
<modules> | |
<module fileurl="file://$PROJECT_DIR$/.idea/pygametest.iml" filepath="$PROJECT_DIR$/.idea/pygametest.iml" /> | |
</modules> | |
</component> | |
</project> |
<?xml version="1.0" encoding="UTF-8"?> | |
<module type="PYTHON_MODULE" version="4"> | |
<component name="NewModuleRootManager"> | |
<content url="file://$MODULE_DIR$"> | |
<excludeFolder url="file://$MODULE_DIR$/venv" /> | |
</content> | |
<orderEntry type="jdk" jdkName="Python 3.6 (pygametest)" jdkType="Python SDK" /> | |
<orderEntry type="sourceFolder" forTests="false" /> | |
<orderEntry type="library" name="R User Library" level="project" /> | |
<orderEntry type="library" name="R Skeletons" level="application" /> | |
</component> | |
</module> |
# -*- coding:utf-8 -*- | |
import pygame | |
from pygame.locals import * | |
import sys | |
import random | |
def main(): | |
(w, h) = (640, 480) # 画面サイズ | |
(x, y) = (w/2, h/2) | |
pygame.init() | |
screen = pygame.display.set_mode((w, h)) | |
pygame.display.set_caption(("NEKO GAME")) | |
font = pygame.font.Font(None, 55) | |
backimg = pygame.image.load("bg.png").convert() | |
playerimg = pygame.image.load("player.png").convert_alpha() | |
while 1: | |
pygame.display.update() | |
pygame.time.delay(30) | |
screen.fill((0, 0, 0)) | |
screen.blit(backimg, (0, 0)) | |
screen.blit(playerimg, (x, y)) | |
text = font.render("NEKO", True, (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))) | |
# pygame.draw.circle(screen, (255, 51, 51), (random.randint(0, h), random.randint(0, h)), 15, 0) | |
screen.blit(text, [10, 10]) | |
for event in pygame.event.get(): | |
if event.type == MOUSEMOTION: | |
x, y = event.pos | |
x -= playerimg.get_width() / 2 | |
y -= playerimg.get_height() / 2 | |
if event.type == QUIT: | |
pygame.quit() | |
sys.exit() | |
if __name__ == "__main__": | |
main() |
/* | |
pygame - Python Game Library | |
This library is free software; you can redistribute it and/or | |
modify it under the terms of the GNU Library General Public | |
License as published by the Free Software Foundation; either | |
version 2 of the License, or (at your option) any later version. | |
This library is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
Library General Public License for more details. | |
You should have received a copy of the GNU Library General Public | |
License along with this library; if not, write to the Free | |
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
*/ | |
#ifndef _CAMERA_H | |
#define _CAMERA_H | |
#include "_pygame.h" | |
#include "camera.h" | |
#endif | |
/* | |
pygame - Python Game Library | |
Copyright (C) 2000-2001 Pete Shinners | |
This library is free software; you can redistribute it and/or | |
modify it under the terms of the GNU Library General Public | |
License as published by the Free Software Foundation; either | |
version 2 of the License, or (at your option) any later version. | |
This library is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
Library General Public License for more details. | |
You should have received a copy of the GNU Library General Public | |
License along with this library; if not, write to the Free | |
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
Pete Shinners | |
[email protected] | |
*/ | |
#ifndef _PYGAME_H | |
#define _PYGAME_H | |
/** This header file includes all the definitions for the | |
** base pygame extensions. This header only requires | |
** SDL and Python includes. The reason for functions | |
** prototyped with #define's is to allow for maximum | |
** python portability. It also uses python as the | |
** runtime linker, which allows for late binding. For more | |
** information on this style of development, read the Python | |
** docs on this subject. | |
** http://www.python.org/doc/current/ext/using-cobjects.html | |
** | |
** If using this to build your own derived extensions, | |
** you'll see that the functions available here are mainly | |
** used to help convert between python objects and SDL objects. | |
** Since this library doesn't add a lot of functionality to | |
** the SDL libarary, it doesn't need to offer a lot either. | |
** | |
** When initializing your extension module, you must manually | |
** import the modules you want to use. (this is the part about | |
** using python as the runtime linker). Each module has its | |
** own import_xxx() routine. You need to perform this import | |
** after you have initialized your own module, and before | |
** you call any routines from that module. Since every module | |
** in pygame does this, there are plenty of examples. | |
** | |
** The base module does include some useful conversion routines | |
** that you are free to use in your own extension. | |
** | |
** When making changes, it is very important to keep the | |
** FIRSTSLOT and NUMSLOT constants up to date for each | |
** section. Also be sure not to overlap any of the slots. | |
** When you do make a mistake with this, it will result | |
** is a dereferenced NULL pointer that is easier to diagnose | |
** than it could be :] | |
**/ | |
#if defined(HAVE_SNPRINTF) /* defined in python.h (pyerrors.h) and SDL.h (SDL_config.h) */ | |
#undef HAVE_SNPRINTF /* remove GCC redefine warning */ | |
#endif | |
// This must be before all else | |
#if defined(__SYMBIAN32__) && defined( OPENC ) | |
#include <sys/types.h> | |
#if defined(__WINS__) | |
void* _alloca(size_t size); | |
# define alloca _alloca | |
#endif | |
#endif | |
/* This is unconditionally defined in Python.h */ | |
#if defined(_POSIX_C_SOURCE) | |
#undef _POSIX_C_SOURCE | |
#endif | |
#include <Python.h> | |
/* Cobjects vanish in Python 3.2; so we will code as though we use capsules */ | |
#if defined(Py_CAPSULE_H) | |
#define PG_HAVE_CAPSULE 1 | |
#else | |
#define PG_HAVE_CAPSULE 0 | |
#endif | |
#if defined(Py_COBJECT_H) | |
#define PG_HAVE_COBJECT 1 | |
#else | |
#define PG_HAVE_COBJECT 0 | |
#endif | |
#if !PG_HAVE_CAPSULE | |
#define PyCapsule_New(ptr, n, dfn) PyCObject_FromVoidPtr(ptr, dfn) | |
#define PyCapsule_GetPointer(obj, n) PyCObject_AsVoidPtr(obj) | |
#define PyCapsule_CheckExact(obj) PyCObject_Check(obj) | |
#endif | |
/* Pygame uses Py_buffer (PEP 3118) to exchange array information internally; | |
* define here as needed. | |
*/ | |
#if !defined(PyBUF_SIMPLE) | |
typedef struct bufferinfo { | |
void *buf; | |
PyObject *obj; | |
Py_ssize_t len; | |
Py_ssize_t itemsize; | |
int readonly; | |
int ndim; | |
char *format; | |
Py_ssize_t *shape; | |
Py_ssize_t *strides; | |
Py_ssize_t *suboffsets; | |
void *internal; | |
} Py_buffer; | |
/* Flags for getting buffers */ | |
#define PyBUF_SIMPLE 0 | |
#define PyBUF_WRITABLE 0x0001 | |
/* we used to include an E, backwards compatible alias */ | |
#define PyBUF_WRITEABLE PyBUF_WRITABLE | |
#define PyBUF_FORMAT 0x0004 | |
#define PyBUF_ND 0x0008 | |
#define PyBUF_STRIDES (0x0010 | PyBUF_ND) | |
#define PyBUF_C_CONTIGUOUS (0x0020 | PyBUF_STRIDES) | |
#define PyBUF_F_CONTIGUOUS (0x0040 | PyBUF_STRIDES) | |
#define PyBUF_ANY_CONTIGUOUS (0x0080 | PyBUF_STRIDES) | |
#define PyBUF_INDIRECT (0x0100 | PyBUF_STRIDES) | |
#define PyBUF_CONTIG (PyBUF_ND | PyBUF_WRITABLE) | |
#define PyBUF_CONTIG_RO (PyBUF_ND) | |
#define PyBUF_STRIDED (PyBUF_STRIDES | PyBUF_WRITABLE) | |
#define PyBUF_STRIDED_RO (PyBUF_STRIDES) | |
#define PyBUF_RECORDS (PyBUF_STRIDES | PyBUF_WRITABLE | PyBUF_FORMAT) | |
#define PyBUF_RECORDS_RO (PyBUF_STRIDES | PyBUF_FORMAT) | |
#define PyBUF_FULL (PyBUF_INDIRECT | PyBUF_WRITABLE | PyBUF_FORMAT) | |
#define PyBUF_FULL_RO (PyBUF_INDIRECT | PyBUF_FORMAT) | |
#define PyBUF_READ 0x100 | |
#define PyBUF_WRITE 0x200 | |
#define PyBUF_SHADOW 0x400 | |
typedef int (*getbufferproc)(PyObject *, Py_buffer *, int); | |
typedef void (*releasebufferproc)(Py_buffer *); | |
#endif /* #if !defined(PyBUF_SIMPLE) */ | |
/* Flag indicating a Pg_buffer; used for assertions within callbacks */ | |
#ifndef NDEBUG | |
#define PyBUF_PYGAME 0x4000 | |
#endif | |
#define PyBUF_HAS_FLAG(f, F) (((f) & (F)) == (F)) | |
/* Array information exchange struct C type; inherits from Py_buffer | |
* | |
* Pygame uses its own Py_buffer derived C struct as an internal representation | |
* of an imported array buffer. The extended Py_buffer allows for a | |
* per-instance release callback, | |
*/ | |
typedef void (*pybuffer_releaseproc)(Py_buffer *); | |
typedef struct pg_bufferinfo_s { | |
Py_buffer view; | |
PyObject *consumer; /* Input: Borrowed reference */ | |
pybuffer_releaseproc release_buffer; | |
} Pg_buffer; | |
/* Operating system specific adjustments | |
*/ | |
// No signal() | |
#if defined(__SYMBIAN32__) && defined(HAVE_SIGNAL_H) | |
#undef HAVE_SIGNAL_H | |
#endif | |
#if defined(HAVE_SNPRINTF) | |
#undef HAVE_SNPRINTF | |
#endif | |
#ifdef MS_WIN32 /*Python gives us MS_WIN32, SDL needs just WIN32*/ | |
#ifndef WIN32 | |
#define WIN32 | |
#endif | |
#endif | |
/// Prefix when initializing module | |
#define MODPREFIX "" | |
/// Prefix when importing module | |
#define IMPPREFIX "pygame." | |
#ifdef __SYMBIAN32__ | |
#undef MODPREFIX | |
#undef IMPPREFIX | |
// On Symbian there is no pygame package. The extensions are built-in or in sys\bin. | |
#define MODPREFIX "pygame_" | |
#define IMPPREFIX "pygame_" | |
#endif | |
#include <SDL.h> | |
/* macros used throughout the source */ | |
#define RAISE(x,y) (PyErr_SetString((x), (y)), (PyObject*)NULL) | |
#if PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION == 3 | |
# define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None | |
# define Py_RETURN_TRUE return Py_INCREF(Py_True), Py_True | |
# define Py_RETURN_FALSE return Py_INCREF(Py_False), Py_False | |
#endif | |
/* Py_ssize_t availability. */ | |
#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) | |
typedef int Py_ssize_t; | |
#define PY_SSIZE_T_MAX INT_MAX | |
#define PY_SSIZE_T_MIN INT_MIN | |
typedef inquiry lenfunc; | |
typedef intargfunc ssizeargfunc; | |
typedef intobjargproc ssizeobjargproc; | |
typedef intintargfunc ssizessizeargfunc; | |
typedef intintobjargproc ssizessizeobjargproc; | |
typedef getreadbufferproc readbufferproc; | |
typedef getwritebufferproc writebufferproc; | |
typedef getsegcountproc segcountproc; | |
typedef getcharbufferproc charbufferproc; | |
#endif | |
#define PyType_Init(x) (((x).ob_type) = &PyType_Type) | |
#define PYGAMEAPI_LOCAL_ENTRY "_PYGAME_C_API" | |
#ifndef MIN | |
#define MIN(a,b) ((a) < (b) ? (a) : (b)) | |
#endif | |
#ifndef MAX | |
#define MAX(a,b) ( (a) > (b) ? (a) : (b)) | |
#endif | |
#ifndef ABS | |
#define ABS(a) (((a) < 0) ? -(a) : (a)) | |
#endif | |
/* test sdl initializations */ | |
#define VIDEO_INIT_CHECK() \ | |
if(!SDL_WasInit(SDL_INIT_VIDEO)) \ | |
return RAISE(PyExc_SDLError, "video system not initialized") | |
#define CDROM_INIT_CHECK() \ | |
if(!SDL_WasInit(SDL_INIT_CDROM)) \ | |
return RAISE(PyExc_SDLError, "cdrom system not initialized") | |
#define JOYSTICK_INIT_CHECK() \ | |
if(!SDL_WasInit(SDL_INIT_JOYSTICK)) \ | |
return RAISE(PyExc_SDLError, "joystick system not initialized") | |
/* BASE */ | |
#define VIEW_CONTIGUOUS 1 | |
#define VIEW_C_ORDER 2 | |
#define VIEW_F_ORDER 4 | |
#define PYGAMEAPI_BASE_FIRSTSLOT 0 | |
#define PYGAMEAPI_BASE_NUMSLOTS 19 | |
#ifndef PYGAMEAPI_BASE_INTERNAL | |
#define PyExc_SDLError ((PyObject*)PyGAME_C_API[PYGAMEAPI_BASE_FIRSTSLOT]) | |
#define PyGame_RegisterQuit \ | |
(*(void(*)(void(*)(void)))PyGAME_C_API[PYGAMEAPI_BASE_FIRSTSLOT + 1]) | |
#define IntFromObj \ | |
(*(int(*)(PyObject*, int*))PyGAME_C_API[PYGAMEAPI_BASE_FIRSTSLOT + 2]) | |
#define IntFromObjIndex \ | |
(*(int(*)(PyObject*, int, int*))PyGAME_C_API[PYGAMEAPI_BASE_FIRSTSLOT + 3]) | |
#define TwoIntsFromObj \ | |
(*(int(*)(PyObject*, int*, int*))PyGAME_C_API[PYGAMEAPI_BASE_FIRSTSLOT + 4]) | |
#define FloatFromObj \ | |
(*(int(*)(PyObject*, float*))PyGAME_C_API[PYGAMEAPI_BASE_FIRSTSLOT + 5]) | |
#define FloatFromObjIndex \ | |
(*(float(*)(PyObject*, int, float*)) \ | |
PyGAME_C_API[PYGAMEAPI_BASE_FIRSTSLOT + 6]) | |
#define TwoFloatsFromObj \ | |
(*(int(*)(PyObject*, float*, float*)) \ | |
PyGAME_C_API[PYGAMEAPI_BASE_FIRSTSLOT + 7]) | |
#define UintFromObj \ | |
(*(int(*)(PyObject*, Uint32*))PyGAME_C_API[PYGAMEAPI_BASE_FIRSTSLOT + 8]) | |
#define UintFromObjIndex \ | |
(*(int(*)(PyObject*, int, Uint32*)) \ | |
PyGAME_C_API[PYGAMEAPI_BASE_FIRSTSLOT + 9]) | |
#define PyGame_Video_AutoQuit \ | |
(*(void(*)(void))PyGAME_C_API[PYGAMEAPI_BASE_FIRSTSLOT + 10]) | |
#define PyGame_Video_AutoInit \ | |
(*(int(*)(void))PyGAME_C_API[PYGAMEAPI_BASE_FIRSTSLOT + 11]) | |
#define RGBAFromObj \ | |
(*(int(*)(PyObject*, Uint8*))PyGAME_C_API[PYGAMEAPI_BASE_FIRSTSLOT + 12]) | |
#define PgBuffer_AsArrayInterface \ | |
(*(PyObject*(*)(Py_buffer*)) PyGAME_C_API[PYGAMEAPI_BASE_FIRSTSLOT + 13]) | |
#define PgBuffer_AsArrayStruct \ | |
(*(PyObject*(*)(Py_buffer*)) \ | |
PyGAME_C_API[PYGAMEAPI_BASE_FIRSTSLOT + 14]) | |
#define PgObject_GetBuffer \ | |
(*(int(*)(PyObject*, Pg_buffer*, int)) \ | |
PyGAME_C_API[PYGAMEAPI_BASE_FIRSTSLOT + 15]) | |
#define PgBuffer_Release \ | |
(*(void(*)(Pg_buffer*)) \ | |
PyGAME_C_API[PYGAMEAPI_BASE_FIRSTSLOT + 16]) | |
#define PgDict_AsBuffer \ | |
(*(int(*)(Pg_buffer*, PyObject*, int)) \ | |
PyGAME_C_API[PYGAMEAPI_BASE_FIRSTSLOT + 17]) | |
#define PgExc_BufferError \ | |
((PyObject*)PyGAME_C_API[PYGAMEAPI_BASE_FIRSTSLOT + 18]) | |
#define import_pygame_base() IMPORT_PYGAME_MODULE(base, BASE) | |
#endif | |
/* RECT */ | |
#define PYGAMEAPI_RECT_FIRSTSLOT \ | |
(PYGAMEAPI_BASE_FIRSTSLOT + PYGAMEAPI_BASE_NUMSLOTS) | |
#define PYGAMEAPI_RECT_NUMSLOTS 4 | |
typedef struct { | |
int x, y; | |
int w, h; | |
}GAME_Rect; | |
typedef struct { | |
PyObject_HEAD | |
GAME_Rect r; | |
PyObject *weakreflist; | |
} PyRectObject; | |
#define PyRect_AsRect(x) (((PyRectObject*)x)->r) | |
#ifndef PYGAMEAPI_RECT_INTERNAL | |
#define PyRect_Check(x) \ | |
((x)->ob_type == (PyTypeObject*)PyGAME_C_API[PYGAMEAPI_RECT_FIRSTSLOT + 0]) | |
#define PyRect_Type (*(PyTypeObject*)PyGAME_C_API[PYGAMEAPI_RECT_FIRSTSLOT + 0]) | |
#define PyRect_New \ | |
(*(PyObject*(*)(SDL_Rect*))PyGAME_C_API[PYGAMEAPI_RECT_FIRSTSLOT + 1]) | |
#define PyRect_New4 \ | |
(*(PyObject*(*)(int,int,int,int))PyGAME_C_API[PYGAMEAPI_RECT_FIRSTSLOT + 2]) | |
#define GameRect_FromObject \ | |
(*(GAME_Rect*(*)(PyObject*, GAME_Rect*)) \ | |
PyGAME_C_API[PYGAMEAPI_RECT_FIRSTSLOT + 3]) | |
#define import_pygame_rect() IMPORT_PYGAME_MODULE(rect, RECT) | |
#endif | |
/* CDROM */ | |
#define PYGAMEAPI_CDROM_FIRSTSLOT \ | |
(PYGAMEAPI_RECT_FIRSTSLOT + PYGAMEAPI_RECT_NUMSLOTS) | |
#define PYGAMEAPI_CDROM_NUMSLOTS 2 | |
typedef struct { | |
PyObject_HEAD | |
int id; | |
} PyCDObject; | |
#define PyCD_AsID(x) (((PyCDObject*)x)->id) | |
#ifndef PYGAMEAPI_CDROM_INTERNAL | |
#define PyCD_Check(x) \ | |
((x)->ob_type == (PyTypeObject*)PyGAME_C_API[PYGAMEAPI_CDROM_FIRSTSLOT + 0]) | |
#define PyCD_Type (*(PyTypeObject*)PyGAME_C_API[PYGAMEAPI_CDROM_FIRSTSLOT + 0]) | |
#define PyCD_New \ | |
(*(PyObject*(*)(int))PyGAME_C_API[PYGAMEAPI_CDROM_FIRSTSLOT + 1]) | |
#define import_pygame_cd() IMPORT_PYGAME_MODULE(cdrom, CDROM) | |
#endif | |
/* JOYSTICK */ | |
#define PYGAMEAPI_JOYSTICK_FIRSTSLOT \ | |
(PYGAMEAPI_CDROM_FIRSTSLOT + PYGAMEAPI_CDROM_NUMSLOTS) | |
#define PYGAMEAPI_JOYSTICK_NUMSLOTS 2 | |
typedef struct { | |
PyObject_HEAD | |
int id; | |
} PyJoystickObject; | |
#define PyJoystick_AsID(x) (((PyJoystickObject*)x)->id) | |
#ifndef PYGAMEAPI_JOYSTICK_INTERNAL | |
#define PyJoystick_Check(x) \ | |
((x)->ob_type == (PyTypeObject*) \ | |
PyGAME_C_API[PYGAMEAPI_JOYSTICK_FIRSTSLOT + 0]) | |
#define PyJoystick_Type \ | |
(*(PyTypeObject*)PyGAME_C_API[PYGAMEAPI_JOYSTICK_FIRSTSLOT + 0]) | |
#define PyJoystick_New \ | |
(*(PyObject*(*)(int))PyGAME_C_API[PYGAMEAPI_JOYSTICK_FIRSTSLOT + 1]) | |
#define import_pygame_joystick() IMPORT_PYGAME_MODULE(joystick, JOYSTICK) | |
#endif | |
/* DISPLAY */ | |
#define PYGAMEAPI_DISPLAY_FIRSTSLOT \ | |
(PYGAMEAPI_JOYSTICK_FIRSTSLOT + PYGAMEAPI_JOYSTICK_NUMSLOTS) | |
#define PYGAMEAPI_DISPLAY_NUMSLOTS 2 | |
typedef struct { | |
PyObject_HEAD | |
SDL_VideoInfo info; | |
} PyVidInfoObject; | |
#define PyVidInfo_AsVidInfo(x) (((PyVidInfoObject*)x)->info) | |
#ifndef PYGAMEAPI_DISPLAY_INTERNAL | |
#define PyVidInfo_Check(x) \ | |
((x)->ob_type == (PyTypeObject*) \ | |
PyGAME_C_API[PYGAMEAPI_DISPLAY_FIRSTSLOT + 0]) | |
#define PyVidInfo_Type \ | |
(*(PyTypeObject*)PyGAME_C_API[PYGAMEAPI_DISPLAY_FIRSTSLOT + 0]) | |
#define PyVidInfo_New \ | |
(*(PyObject*(*)(SDL_VideoInfo*)) \ | |
PyGAME_C_API[PYGAMEAPI_DISPLAY_FIRSTSLOT + 1]) | |
#define import_pygame_display() IMPORT_PYGAME_MODULE(display, DISPLAY) | |
#endif | |
/* SURFACE */ | |
#define PYGAMEAPI_SURFACE_FIRSTSLOT \ | |
(PYGAMEAPI_DISPLAY_FIRSTSLOT + PYGAMEAPI_DISPLAY_NUMSLOTS) | |
#define PYGAMEAPI_SURFACE_NUMSLOTS 3 | |
typedef struct { | |
PyObject_HEAD | |
SDL_Surface* surf; | |
struct SubSurface_Data* subsurface; /*ptr to subsurface data (if a | |
* subsurface)*/ | |
PyObject *weakreflist; | |
PyObject *locklist; | |
PyObject *dependency; | |
} PySurfaceObject; | |
#define PySurface_AsSurface(x) (((PySurfaceObject*)x)->surf) | |
#ifndef PYGAMEAPI_SURFACE_INTERNAL | |
#define PySurface_Check(x) \ | |
((x)->ob_type == (PyTypeObject*) \ | |
PyGAME_C_API[PYGAMEAPI_SURFACE_FIRSTSLOT + 0]) | |
#define PySurface_Type \ | |
(*(PyTypeObject*)PyGAME_C_API[PYGAMEAPI_SURFACE_FIRSTSLOT + 0]) | |
#define PySurface_New \ | |
(*(PyObject*(*)(SDL_Surface*)) \ | |
PyGAME_C_API[PYGAMEAPI_SURFACE_FIRSTSLOT + 1]) | |
#define PySurface_Blit \ | |
(*(int(*)(PyObject*,PyObject*,SDL_Rect*,SDL_Rect*,int)) \ | |
PyGAME_C_API[PYGAMEAPI_SURFACE_FIRSTSLOT + 2]) | |
#define import_pygame_surface() do { \ | |
IMPORT_PYGAME_MODULE(surface, SURFACE); \ | |
if (PyErr_Occurred() != NULL) break; \ | |
IMPORT_PYGAME_MODULE(surflock, SURFLOCK); \ | |
} while (0) | |
#endif | |
/* SURFLOCK */ /*auto import/init by surface*/ | |
#define PYGAMEAPI_SURFLOCK_FIRSTSLOT \ | |
(PYGAMEAPI_SURFACE_FIRSTSLOT + PYGAMEAPI_SURFACE_NUMSLOTS) | |
#define PYGAMEAPI_SURFLOCK_NUMSLOTS 8 | |
struct SubSurface_Data | |
{ | |
PyObject* owner; | |
int pixeloffset; | |
int offsetx, offsety; | |
}; | |
typedef struct | |
{ | |
PyObject_HEAD | |
PyObject *surface; | |
PyObject *lockobj; | |
PyObject *weakrefs; | |
} PyLifetimeLock; | |
#ifndef PYGAMEAPI_SURFLOCK_INTERNAL | |
#define PyLifetimeLock_Check(x) \ | |
((x)->ob_type == (PyTypeObject*) \ | |
PyGAME_C_API[PYGAMEAPI_SURFLOCK_FIRSTSLOT + 0]) | |
#define PySurface_Prep(x) \ | |
if(((PySurfaceObject*)x)->subsurface) \ | |
(*(*(void(*)(PyObject*)) \ | |
PyGAME_C_API[PYGAMEAPI_SURFLOCK_FIRSTSLOT + 1]))(x) | |
#define PySurface_Unprep(x) \ | |
if(((PySurfaceObject*)x)->subsurface) \ | |
(*(*(void(*)(PyObject*)) \ | |
PyGAME_C_API[PYGAMEAPI_SURFLOCK_FIRSTSLOT + 2]))(x) | |
#define PySurface_Lock \ | |
(*(int(*)(PyObject*))PyGAME_C_API[PYGAMEAPI_SURFLOCK_FIRSTSLOT + 3]) | |
#define PySurface_Unlock \ | |
(*(int(*)(PyObject*))PyGAME_C_API[PYGAMEAPI_SURFLOCK_FIRSTSLOT + 4]) | |
#define PySurface_LockBy \ | |
(*(int(*)(PyObject*,PyObject*)) \ | |
PyGAME_C_API[PYGAMEAPI_SURFLOCK_FIRSTSLOT + 5]) | |
#define PySurface_UnlockBy \ | |
(*(int(*)(PyObject*,PyObject*)) \ | |
PyGAME_C_API[PYGAMEAPI_SURFLOCK_FIRSTSLOT + 6]) | |
#define PySurface_LockLifetime \ | |
(*(PyObject*(*)(PyObject*,PyObject*)) \ | |
PyGAME_C_API[PYGAMEAPI_SURFLOCK_FIRSTSLOT + 7]) | |
#endif | |
/* EVENT */ | |
#define PYGAMEAPI_EVENT_FIRSTSLOT \ | |
(PYGAMEAPI_SURFLOCK_FIRSTSLOT + PYGAMEAPI_SURFLOCK_NUMSLOTS) | |
#define PYGAMEAPI_EVENT_NUMSLOTS 4 | |
typedef struct { | |
PyObject_HEAD | |
int type; | |
PyObject* dict; | |
} PyEventObject; | |
#ifndef PYGAMEAPI_EVENT_INTERNAL | |
#define PyEvent_Check(x) \ | |
((x)->ob_type == (PyTypeObject*)PyGAME_C_API[PYGAMEAPI_EVENT_FIRSTSLOT + 0]) | |
#define PyEvent_Type \ | |
(*(PyTypeObject*)PyGAME_C_API[PYGAMEAPI_EVENT_FIRSTSLOT + 0]) | |
#define PyEvent_New \ | |
(*(PyObject*(*)(SDL_Event*))PyGAME_C_API[PYGAMEAPI_EVENT_FIRSTSLOT + 1]) | |
#define PyEvent_New2 \ | |
(*(PyObject*(*)(int, PyObject*))PyGAME_C_API[PYGAMEAPI_EVENT_FIRSTSLOT + 2]) | |
#define PyEvent_FillUserEvent \ | |
(*(int (*)(PyEventObject*, SDL_Event*)) \ | |
PyGAME_C_API[PYGAMEAPI_EVENT_FIRSTSLOT + 3]) | |
#define import_pygame_event() IMPORT_PYGAME_MODULE(event, EVENT) | |
#endif | |
/* RWOBJECT */ | |
/*the rwobject are only needed for C side work, not accessable from python*/ | |
#define PYGAMEAPI_RWOBJECT_FIRSTSLOT \ | |
(PYGAMEAPI_EVENT_FIRSTSLOT + PYGAMEAPI_EVENT_NUMSLOTS) | |
#define PYGAMEAPI_RWOBJECT_NUMSLOTS 7 | |
#ifndef PYGAMEAPI_RWOBJECT_INTERNAL | |
#define RWopsFromObject \ | |
(*(SDL_RWops*(*)(PyObject*))PyGAME_C_API[PYGAMEAPI_RWOBJECT_FIRSTSLOT + 0]) | |
#define RWopsCheckObject \ | |
(*(int(*)(SDL_RWops*))PyGAME_C_API[PYGAMEAPI_RWOBJECT_FIRSTSLOT + 1]) | |
#define RWopsFromFileObjectThreaded \ | |
(*(SDL_RWops*(*)(PyObject*))PyGAME_C_API[PYGAMEAPI_RWOBJECT_FIRSTSLOT + 2]) | |
#define RWopsCheckObjectThreaded \ | |
(*(int(*)(SDL_RWops*))PyGAME_C_API[PYGAMEAPI_RWOBJECT_FIRSTSLOT + 3]) | |
#define RWopsEncodeFilePath \ | |
(*(PyObject*(*)(PyObject*, PyObject*)) \ | |
PyGAME_C_API[PYGAMEAPI_RWOBJECT_FIRSTSLOT + 4]) | |
#define RWopsEncodeString \ | |
(*(PyObject*(*)(PyObject*, const char*, const char*, PyObject*)) \ | |
PyGAME_C_API[PYGAMEAPI_RWOBJECT_FIRSTSLOT + 5]) | |
#define RWopsFromFileObject \ | |
(*(SDL_RWops*(*)(PyObject*))PyGAME_C_API[PYGAMEAPI_RWOBJECT_FIRSTSLOT + 6]) | |
#define import_pygame_rwobject() IMPORT_PYGAME_MODULE(rwobject, RWOBJECT) | |
/* For backward compatibility */ | |
#define RWopsFromPython RWopsFromObject | |
#define RWopsCheckPython RWopsCheckObject | |
#define RWopsFromPythonThreaded RWopsFromFileObjectThreaded | |
#define RWopsCheckPythonThreaded RWopsCheckObjectThreaded | |
#endif | |
/* PixelArray */ | |
#define PYGAMEAPI_PIXELARRAY_FIRSTSLOT \ | |
(PYGAMEAPI_RWOBJECT_FIRSTSLOT + PYGAMEAPI_RWOBJECT_NUMSLOTS) | |
#define PYGAMEAPI_PIXELARRAY_NUMSLOTS 2 | |
#ifndef PYGAMEAPI_PIXELARRAY_INTERNAL | |
#define PyPixelArray_Check(x) \ | |
((x)->ob_type == (PyTypeObject*) \ | |
PyGAME_C_API[PYGAMEAPI_PIXELARRAY_FIRSTSLOT + 0]) | |
#define PyPixelArray_New \ | |
(*(PyObject*(*)) PyGAME_C_API[PYGAMEAPI_PIXELARRAY_FIRSTSLOT + 1]) | |
#define import_pygame_pixelarray() IMPORT_PYGAME_MODULE(pixelarray, PIXELARRAY) | |
#endif /* PYGAMEAPI_PIXELARRAY_INTERNAL */ | |
/* Color */ | |
#define PYGAMEAPI_COLOR_FIRSTSLOT \ | |
(PYGAMEAPI_PIXELARRAY_FIRSTSLOT + PYGAMEAPI_PIXELARRAY_NUMSLOTS) | |
#define PYGAMEAPI_COLOR_NUMSLOTS 4 | |
#ifndef PYGAMEAPI_COLOR_INTERNAL | |
#define PyColor_Check(x) \ | |
((x)->ob_type == (PyTypeObject*) \ | |
PyGAME_C_API[PYGAMEAPI_COLOR_FIRSTSLOT + 0]) | |
#define PyColor_Type (*(PyObject *) PyGAME_C_API[PYGAMEAPI_COLOR_FIRSTSLOT]) | |
#define PyColor_New \ | |
(*(PyObject *(*)(Uint8*)) PyGAME_C_API[PYGAMEAPI_COLOR_FIRSTSLOT + 1]) | |
#define PyColor_NewLength \ | |
(*(PyObject *(*)(Uint8*, Uint8)) PyGAME_C_API[PYGAMEAPI_COLOR_FIRSTSLOT + 3]) | |
#define RGBAFromColorObj \ | |
(*(int(*)(PyObject*, Uint8*)) PyGAME_C_API[PYGAMEAPI_COLOR_FIRSTSLOT + 2]) | |
#define import_pygame_color() IMPORT_PYGAME_MODULE(color, COLOR) | |
#endif /* PYGAMEAPI_COLOR_INTERNAL */ | |
/* Math */ | |
#define PYGAMEAPI_MATH_FIRSTSLOT \ | |
(PYGAMEAPI_COLOR_FIRSTSLOT + PYGAMEAPI_COLOR_NUMSLOTS) | |
#define PYGAMEAPI_MATH_NUMSLOTS 2 | |
#ifndef PYGAMEAPI_MATH_INTERNAL | |
#define PyVector2_Check(x) \ | |
((x)->ob_type == (PyTypeObject*) \ | |
PyGAME_C_API[PYGAMEAPI_MATH_FIRSTSLOT + 0]) | |
#define PyVector3_Check(x) \ | |
((x)->ob_type == (PyTypeObject*) \ | |
PyGAME_C_API[PYGAMEAPI_MATH_FIRSTSLOT + 1]) | |
/* | |
#define PyVector2_New \ | |
(*(PyObject*(*)) PyGAME_C_API[PYGAMEAPI_MATH_FIRSTSLOT + 1]) | |
*/ | |
#define import_pygame_math() IMPORT_PYGAME_MODULE(math, MATH) | |
#endif /* PYGAMEAPI_MATH_INTERNAL */ | |
#define PG_CAPSULE_NAME(m) (IMPPREFIX m "." PYGAMEAPI_LOCAL_ENTRY) | |
#define _IMPORT_PYGAME_MODULE(module, MODULE, api_root) { \ | |
PyObject *_module = PyImport_ImportModule (IMPPREFIX #module); \ | |
\ | |
if (_module != NULL) { \ | |
PyObject *_c_api = \ | |
PyObject_GetAttrString (_module, PYGAMEAPI_LOCAL_ENTRY); \ | |
\ | |
Py_DECREF (_module); \ | |
if (_c_api != NULL && PyCapsule_CheckExact (_c_api)) { \ | |
void **localptr = \ | |
(void**) PyCapsule_GetPointer (_c_api, \ | |
PG_CAPSULE_NAME(#module)); \ | |
\ | |
if (localptr != NULL) { \ | |
memcpy (api_root + PYGAMEAPI_##MODULE##_FIRSTSLOT, \ | |
localptr, \ | |
sizeof(void **)*PYGAMEAPI_##MODULE##_NUMSLOTS); \ | |
} \ | |
} \ | |
Py_XDECREF(_c_api); \ | |
} \ | |
} | |
#ifndef NO_PYGAME_C_API | |
#define IMPORT_PYGAME_MODULE(module, MODULE) \ | |
_IMPORT_PYGAME_MODULE(module, MODULE, PyGAME_C_API) | |
#define PYGAMEAPI_TOTALSLOTS \ | |
(PYGAMEAPI_MATH_FIRSTSLOT + PYGAMEAPI_MATH_NUMSLOTS) | |
#ifdef PYGAME_H | |
void* PyGAME_C_API[PYGAMEAPI_TOTALSLOTS] = { NULL }; | |
#else | |
extern void* PyGAME_C_API[PYGAMEAPI_TOTALSLOTS]; | |
#endif | |
#endif | |
#if PG_HAVE_CAPSULE | |
#define encapsulate_api(ptr, module) \ | |
PyCapsule_New(ptr, PG_CAPSULE_NAME(module), NULL) | |
#else | |
#define encapsulate_api(ptr, module) \ | |
PyCObject_FromVoidPtr(ptr, NULL) | |
#endif | |
/*last platform compiler stuff*/ | |
#if defined(macintosh) && defined(__MWERKS__) || defined(__SYMBIAN32__) | |
#define PYGAME_EXPORT __declspec(export) | |
#else | |
#define PYGAME_EXPORT | |
#endif | |
#if defined(__SYMBIAN32__) && PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION == 2 | |
// These are missing from Python 2.2 | |
#ifndef Py_RETURN_NONE | |
#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None | |
#define Py_RETURN_TRUE return Py_INCREF(Py_True), Py_True | |
#define Py_RETURN_FALSE return Py_INCREF(Py_False), Py_False | |
#ifndef intrptr_t | |
#define intptr_t int | |
// No PySlice_GetIndicesEx on Py 2.2 | |
#define PySlice_GetIndicesEx(a,b,c,d,e,f) PySlice_GetIndices(a,b,c,d,e) | |
#define PyBool_FromLong(x) Py_BuildValue("b", x) | |
#endif | |
// _symport_free and malloc are not exported in python.dll | |
// See http://discussion.forum.nokia.com/forum/showthread.php?t=57874 | |
#undef PyObject_NEW | |
#define PyObject_NEW PyObject_New | |
#undef PyMem_MALLOC | |
#define PyMem_MALLOC PyMem_Malloc | |
#undef PyObject_DEL | |
#define PyObject_DEL PyObject_Del | |
#endif // intptr_t | |
#endif // __SYMBIAN32__ Python 2.2.2 | |
#endif /* PYGAME_H */ |
/* | |
pygame - Python Game Library | |
Copyright (C) 2000-2001 Pete Shinners | |
Copyright (C) 2007 Marcus von Appen | |
This library is free software; you can redistribute it and/or | |
modify it under the terms of the GNU Library General Public | |
License as published by the Free Software Foundation; either | |
version 2 of the License, or (at your option) any later version. | |
This library is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
Library General Public License for more details. | |
You should have received a copy of the GNU Library General Public | |
License along with this library; if not, write to the Free | |
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
Pete Shinners | |
[email protected] | |
*/ | |
#ifndef _SURFACE_H | |
#define _SURFACE_H | |
#include "_pygame.h" | |
#include "surface.h" | |
#endif | |
/* | |
Bitmask 1.7 - A pixel-perfect collision detection library. | |
Copyright (C) 2002-2005 Ulf Ekstrom except for the bitcount | |
function which is copyright (C) Donald W. Gillies, 1992. | |
This library is free software; you can redistribute it and/or | |
modify it under the terms of the GNU Library General Public | |
License as published by the Free Software Foundation; either | |
version 2 of the License, or (at your option) any later version. | |
This library is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
Library General Public License for more details. | |
You should have received a copy of the GNU Library General Public | |
License along with this library; if not, write to the Free | |
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
*/ | |
#ifndef BITMASK_H | |
#define BITMASK_H | |
#ifdef __cplusplus | |
extern "C" { | |
#endif | |
#include <limits.h> | |
/* Define INLINE for different compilers. If your compiler does not | |
support inlining then there might be a performance hit in | |
bitmask_overlap_area(). | |
*/ | |
#ifndef INLINE | |
# ifdef __GNUC__ | |
# define INLINE inline | |
# else | |
# ifdef _MSC_VER | |
# define INLINE __inline | |
# else | |
# define INLINE | |
# endif | |
# endif | |
#endif | |
#define BITMASK_W unsigned long int | |
#define BITMASK_W_LEN (sizeof(BITMASK_W)*CHAR_BIT) | |
#define BITMASK_W_MASK (BITMASK_W_LEN - 1) | |
#define BITMASK_N(n) ((BITMASK_W)1 << (n)) | |
typedef struct bitmask | |
{ | |
int w,h; | |
BITMASK_W bits[1]; | |
} bitmask_t; | |
/* Creates a bitmask of width w and height h, where | |
w and h must both be greater than 0. | |
The mask is automatically cleared when created. | |
*/ | |
bitmask_t *bitmask_create(int w, int h); | |
/* Frees all the memory allocated by bitmask_create for m. */ | |
void bitmask_free(bitmask_t *m); | |
/* Clears all bits in the mask */ | |
void bitmask_clear(bitmask_t *m); | |
/* Sets all bits in the mask */ | |
void bitmask_fill(bitmask_t *m); | |
/* Flips all bits in the mask */ | |
void bitmask_invert(bitmask_t *m); | |
/* Counts the bits in the mask */ | |
unsigned int bitmask_count(bitmask_t *m); | |
/* Returns nonzero if the bit at (x,y) is set. Coordinates start at | |
(0,0) */ | |
static INLINE int bitmask_getbit(const bitmask_t *m, int x, int y) | |
{ | |
return (m->bits[x/BITMASK_W_LEN*m->h + y] & BITMASK_N(x & BITMASK_W_MASK)) != 0; | |
} | |
/* Sets the bit at (x,y) */ | |
static INLINE void bitmask_setbit(bitmask_t *m, int x, int y) | |
{ | |
m->bits[x/BITMASK_W_LEN*m->h + y] |= BITMASK_N(x & BITMASK_W_MASK); | |
} | |
/* Clears the bit at (x,y) */ | |
static INLINE void bitmask_clearbit(bitmask_t *m, int x, int y) | |
{ | |
m->bits[x/BITMASK_W_LEN*m->h + y] &= ~BITMASK_N(x & BITMASK_W_MASK); | |
} | |
/* Returns nonzero if the masks overlap with the given offset. | |
The overlap tests uses the following offsets (which may be negative): | |
+----+----------.. | |
|A | yoffset | |
| +-+----------.. | |
+--|B | |
|xoffset | |
| | | |
: : | |
*/ | |
int bitmask_overlap(const bitmask_t *a, const bitmask_t *b, int xoffset, int yoffset); | |
/* Like bitmask_overlap(), but will also give a point of intersection. | |
x and y are given in the coordinates of mask a, and are untouched | |
if there is no overlap. */ | |
int bitmask_overlap_pos(const bitmask_t *a, const bitmask_t *b, | |
int xoffset, int yoffset, int *x, int *y); | |
/* Returns the number of overlapping 'pixels' */ | |
int bitmask_overlap_area(const bitmask_t *a, const bitmask_t *b, int xoffset, int yoffset); | |
/* Fills a mask with the overlap of two other masks. A bitwise AND. */ | |
void bitmask_overlap_mask (const bitmask_t *a, const bitmask_t *b, bitmask_t *c, int xoffset, int yoffset); | |
/* Draws mask b onto mask a (bitwise OR). Can be used to compose large | |
(game background?) mask from several submasks, which may speed up | |
the testing. */ | |
void bitmask_draw(bitmask_t *a, const bitmask_t *b, int xoffset, int yoffset); | |
void bitmask_erase(bitmask_t *a, const bitmask_t *b, int xoffset, int yoffset); | |
/* Return a new scaled bitmask, with dimensions w*h. The quality of the | |
scaling may not be perfect for all circumstances, but it should | |
be reasonable. If either w or h is 0 a clear 1x1 mask is returned. */ | |
bitmask_t *bitmask_scale(const bitmask_t *m, int w, int h); | |
/* Convolve b into a, drawing the output into o, shifted by offset. If offset | |
* is 0, then the (x,y) bit will be set if and only if | |
* bitmask_overlap(a, b, x - b->w - 1, y - b->h - 1) returns true. | |
* | |
* Modifies bits o[xoffset ... xoffset + a->w + b->w - 1) | |
* [yoffset ... yoffset + a->h + b->h - 1). */ | |
void bitmask_convolve(const bitmask_t *a, const bitmask_t *b, bitmask_t *o, int xoffset, int yoffset); | |
#ifdef __cplusplus | |
} /* End of extern "C" { */ | |
#endif | |
#endif |
/* | |
pygame - Python Game Library | |
This library is free software; you can redistribute it and/or | |
modify it under the terms of the GNU Library General Public | |
License as published by the Free Software Foundation; either | |
version 2 of the License, or (at your option) any later version. | |
This library is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
Library General Public License for more details. | |
You should have received a copy of the GNU Library General Public | |
License along with this library; if not, write to the Free | |
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
*/ | |
#include "pygame.h" | |
#include "doc/camera_doc.h" | |
#if defined(__unix__) | |
#include <structmember.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <assert.h> | |
#include <fcntl.h> /* low-level i/o */ | |
#include <unistd.h> | |
#include <errno.h> | |
#include <sys/stat.h> | |
#include <sys/types.h> | |
#include <sys/time.h> | |
#include <sys/mman.h> | |
#include <sys/ioctl.h> | |
/* on freebsd there is no asm/types */ | |
#ifdef linux | |
#include <asm/types.h> /* for videodev2.h */ | |
#endif | |
#include <linux/videodev2.h> | |
#elif defined(__APPLE__) | |
#include <AvailabilityMacros.h> | |
/* We support OSX 10.6 and below. */ | |
#if __MAC_OS_X_VERSION_MAX_ALLOWED <= 1060 | |
#define PYGAME_MAC_CAMERA_OLD 1 | |
#endif | |
#endif | |
#if defined(PYGAME_MAC_CAMERA_OLD) | |
#include <QuickTime/QuickTime.h> | |
#include <QuickTime/Movies.h> | |
#include <QuickTime/ImageCompression.h> | |
#endif | |
/* some constants used which are not defined on non-v4l machines. */ | |
#ifndef V4L2_PIX_FMT_RGB24 | |
#define V4L2_PIX_FMT_RGB24 'RGB3' | |
#endif | |
#ifndef V4L2_PIX_FMT_RGB444 | |
#define V4L2_PIX_FMT_RGB444 'R444' | |
#endif | |
#ifndef V4L2_PIX_FMT_YUYV | |
#define V4L2_PIX_FMT_YUYV 'YUYV' | |
#endif | |
#define CLEAR(x) memset (&(x), 0, sizeof (x)) | |
#define SAT(c) if (c & (~255)) { if (c < 0) c = 0; else c = 255; } | |
#define SAT2(c) ((c) & (~255) ? ((c) < 0 ? 0 : 255) : (c)) | |
#define DEFAULT_WIDTH 640 | |
#define DEFAULT_HEIGHT 480 | |
#define RGB_OUT 1 | |
#define YUV_OUT 2 | |
#define HSV_OUT 4 | |
#define CAM_V4L 1 /* deprecated. the incomplete support in pygame was removed */ | |
#define CAM_V4L2 2 | |
struct buffer { | |
void * start; | |
size_t length; | |
}; | |
#if defined(__unix__) | |
typedef struct PyCameraObject { | |
PyObject_HEAD | |
char* device_name; | |
int camera_type; | |
unsigned long pixelformat; | |
unsigned int color_out; | |
struct buffer* buffers; | |
unsigned int n_buffers; | |
int width; | |
int height; | |
int size; | |
int hflip; | |
int vflip; | |
int brightness; | |
int fd; | |
} PyCameraObject; | |
#elif defined(PYGAME_MAC_CAMERA_OLD) | |
typedef struct PyCameraObject { | |
PyObject_HEAD | |
char* device_name; /* unieke name of the device */ | |
OSType pixelformat; | |
unsigned int color_out; | |
SeqGrabComponent component; /* A type used by the Sequence Grabber API */ | |
SGChannel channel; /* Channel of the Sequence Grabber */ | |
GWorldPtr gworld; /* Pointer to the struct that holds the data of the captured image */ | |
Rect boundsRect; /* bounds of the image frame */ | |
long size; /* size of the image in our buffer to draw */ | |
int hflip; | |
int vflip; | |
short depth; | |
struct buffer pixels; | |
//struct buffer tmp_pixels /* place where the flipped image in temporarly stored if hflip or vflip is true.*/ | |
} PyCameraObject; | |
#else | |
/* generic definition. | |
*/ | |
typedef struct PyCameraObject { | |
PyObject_HEAD | |
char* device_name; | |
int camera_type; | |
unsigned long pixelformat; | |
unsigned int color_out; | |
struct buffer* buffers; | |
unsigned int n_buffers; | |
int width; | |
int height; | |
int size; | |
int hflip; | |
int vflip; | |
int brightness; | |
int fd; | |
} PyCameraObject; | |
#endif | |
/* internal functions for colorspace conversion */ | |
void colorspace (SDL_Surface *src, SDL_Surface *dst, int cspace); | |
void rgb24_to_rgb (const void* src, void* dst, int length, SDL_PixelFormat* format); | |
void rgb444_to_rgb (const void* src, void* dst, int length, SDL_PixelFormat* format); | |
void rgb_to_yuv (const void* src, void* dst, int length, | |
unsigned long source, SDL_PixelFormat* format); | |
void rgb_to_hsv (const void* src, void* dst, int length, | |
unsigned long source, SDL_PixelFormat* format); | |
void yuyv_to_rgb (const void* src, void* dst, int length, SDL_PixelFormat* format); | |
void yuyv_to_yuv (const void* src, void* dst, int length, SDL_PixelFormat* format); | |
void sbggr8_to_rgb (const void* src, void* dst, int width, int height, | |
SDL_PixelFormat* format); | |
void yuv420_to_rgb (const void* src, void* dst, int width, int height, | |
SDL_PixelFormat* format); | |
void yuv420_to_yuv (const void* src, void* dst, int width, int height, | |
SDL_PixelFormat* format); | |
#if defined(__unix__) | |
/* internal functions specific to v4l2 */ | |
char** v4l2_list_cameras (int* num_devices); | |
int v4l2_get_control (int fd, int id, int *value); | |
int v4l2_set_control (int fd, int id, int value); | |
PyObject* v4l2_read_raw (PyCameraObject* self); | |
int v4l2_xioctl (int fd, int request, void *arg); | |
int v4l2_process_image (PyCameraObject* self, const void *image, | |
unsigned int buffer_size, SDL_Surface* surf); | |
int v4l2_query_buffer (PyCameraObject* self); | |
int v4l2_read_frame (PyCameraObject* self, SDL_Surface* surf); | |
int v4l2_stop_capturing (PyCameraObject* self); | |
int v4l2_start_capturing (PyCameraObject* self); | |
int v4l2_uninit_device (PyCameraObject* self); | |
int v4l2_init_mmap (PyCameraObject* self); | |
int v4l2_init_device (PyCameraObject* self); | |
int v4l2_close_device (PyCameraObject* self); | |
int v4l2_open_device (PyCameraObject* self); | |
#elif defined(PYGAME_MAC_CAMERA_OLD) | |
/* internal functions specific to mac */ | |
char** mac_list_cameras(int* num_devices); | |
int mac_open_device (PyCameraObject* self); | |
int mac_init_device(PyCameraObject* self); | |
int mac_close_device (PyCameraObject* self); | |
int mac_start_capturing(PyCameraObject* self); | |
int mac_stop_capturing (PyCameraObject* self); | |
int mac_get_control(PyCameraObject* self, int id, int* value); | |
int mac_set_control(PyCameraObject* self, int id, int value); | |
PyObject* mac_read_raw(PyCameraObject *self); | |
int mac_read_frame(PyCameraObject* self, SDL_Surface* surf); | |
int mac_camera_idle(PyCameraObject* self); | |
int mac_copy_gworld_to_surface(PyCameraObject* self, SDL_Surface* surf); | |
void flip_image(const void* image, void* flipped_image, int width, int height, | |
short depth, int hflip, int vflip); | |
#endif |
#ifndef _FASTEVENTS_H_ | |
#define _FASTEVENTS_H_ | |
/* | |
NET2 is a threaded, event based, network IO library for SDL. | |
Copyright (C) 2002 Bob Pendleton | |
This library is free software; you can redistribute it and/or | |
modify it under the terms of the GNU Lesser General Public License | |
as published by the Free Software Foundation; either version 2.1 | |
of the License, or (at your option) any later version. | |
This library is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
Lesser General Public License for more details. | |
You should have received a copy of the GNU Lesser General Public | |
License along with this library; if not, write to the Free | |
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
02111-1307 USA | |
If you do not wish to comply with the terms of the LGPL please | |
contact the author as other terms are available for a fee. | |
Bob Pendleton | |
[email protected] | |
*/ | |
#include "SDL.h" | |
#ifdef __cplusplus | |
extern "C" { | |
#endif | |
int FE_Init(void); // Initialize FE | |
void FE_Quit(void); // shutdown FE | |
void FE_PumpEvents(void); // replacement for SDL_PumpEvents | |
int FE_PollEvent(SDL_Event *event); // replacement for SDL_PollEvent | |
int FE_WaitEvent(SDL_Event *event); // replacement for SDL_WaitEvent | |
int FE_PushEvent(SDL_Event *event); // replacement for SDL_PushEvent | |
char *FE_GetError(void); // get the last error | |
#ifdef __cplusplus | |
} | |
#endif | |
#endif |
/* | |
pygame - Python Game Library | |
Copyright (C) 2000-2001 Pete Shinners | |
This library is free software; you can redistribute it and/or | |
modify it under the terms of the GNU Library General Public | |
License as published by the Free Software Foundation; either | |
version 2 of the License, or (at your option) any later version. | |
This library is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
Library General Public License for more details. | |
You should have received a copy of the GNU Library General Public | |
License along with this library; if not, write to the Free | |
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
Pete Shinners | |
[email protected] | |
*/ | |
#include <Python.h> | |
#if defined(HAVE_SNPRINTF) /* also defined in SDL_ttf (SDL.h) */ | |
#undef HAVE_SNPRINTF /* remove GCC macro redefine warning */ | |
#endif | |
#include <SDL_ttf.h> | |
/* test font initialization */ | |
#define FONT_INIT_CHECK() \ | |
if(!(*(int*)PyFONT_C_API[2])) \ | |
return RAISE(PyExc_SDLError, "font system not initialized") | |
#define PYGAMEAPI_FONT_FIRSTSLOT 0 | |
#define PYGAMEAPI_FONT_NUMSLOTS 3 | |
typedef struct { | |
PyObject_HEAD | |
TTF_Font* font; | |
PyObject* weakreflist; | |
} PyFontObject; | |
#define PyFont_AsFont(x) (((PyFontObject*)x)->font) | |
#ifndef PYGAMEAPI_FONT_INTERNAL | |
#define PyFont_Check(x) ((x)->ob_type == (PyTypeObject*)PyFONT_C_API[0]) | |
#define PyFont_Type (*(PyTypeObject*)PyFONT_C_API[0]) | |
#define PyFont_New (*(PyObject*(*)(TTF_Font*))PyFONT_C_API[1]) | |
/*slot 2 taken by FONT_INIT_CHECK*/ | |
#define import_pygame_font() \ | |
_IMPORT_PYGAME_MODULE(font, FONT, PyFONT_C_API) | |
static void* PyFONT_C_API[PYGAMEAPI_FONT_NUMSLOTS] = {NULL}; | |
#endif | |
/* | |
pygame - Python Game Library | |
Copyright (C) 2009 Vicent Marti | |
This library is free software; you can redistribute it and/or | |
modify it under the terms of the GNU Library General Public | |
License as published by the Free Software Foundation; either | |
version 2 of the License, or (at your option) any later version. | |
This library is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
Library General Public License for more details. | |
You should have received a copy of the GNU Library General Public | |
License along with this library; if not, write to the Free | |
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
*/ | |
#ifndef _PYGAME_FREETYPE_H_ | |
#define _PYGAME_FREETYPE_H_ | |
#define PGFT_PYGAME1_COMPAT | |
#define HAVE_PYGAME_SDL_VIDEO | |
#define HAVE_PYGAME_SDL_RWOPS | |
#include "pygame.h" | |
#include "pgcompat.h" | |
#if PY3 | |
# define IS_PYTHON_3 | |
#endif | |
#include <ft2build.h> | |
#include FT_FREETYPE_H | |
#include FT_CACHE_H | |
#include FT_XFREE86_H | |
#include FT_TRIGONOMETRY_H | |
/********************************************************** | |
* Global module constants | |
**********************************************************/ | |
/* Render styles */ | |
#define FT_STYLE_NORMAL 0x00 | |
#define FT_STYLE_STRONG 0x01 | |
#define FT_STYLE_OBLIQUE 0x02 | |
#define FT_STYLE_UNDERLINE 0x04 | |
#define FT_STYLE_WIDE 0x08 | |
#define FT_STYLE_DEFAULT 0xFF | |
/* Bounding box modes */ | |
#define FT_BBOX_EXACT FT_GLYPH_BBOX_SUBPIXELS | |
#define FT_BBOX_EXACT_GRIDFIT FT_GLYPH_BBOX_GRIDFIT | |
#define FT_BBOX_PIXEL FT_GLYPH_BBOX_TRUNCATE | |
#define FT_BBOX_PIXEL_GRIDFIT FT_GLYPH_BBOX_PIXELS | |
/* Rendering flags */ | |
#define FT_RFLAG_NONE (0) | |
#define FT_RFLAG_ANTIALIAS (1 << 0) | |
#define FT_RFLAG_AUTOHINT (1 << 1) | |
#define FT_RFLAG_VERTICAL (1 << 2) | |
#define FT_RFLAG_HINTED (1 << 3) | |
#define FT_RFLAG_KERNING (1 << 4) | |
#define FT_RFLAG_TRANSFORM (1 << 5) | |
#define FT_RFLAG_PAD (1 << 6) | |
#define FT_RFLAG_ORIGIN (1 << 7) | |
#define FT_RFLAG_UCS4 (1 << 8) | |
#define FT_RFLAG_USE_BITMAP_STRIKES (1 << 9) | |
#define FT_RFLAG_DEFAULTS (FT_RFLAG_HINTED | \ | |
FT_RFLAG_USE_BITMAP_STRIKES | \ | |
FT_RFLAG_ANTIALIAS) | |
#define FT_RENDER_NEWBYTEARRAY 0x0 | |
#define FT_RENDER_NEWSURFACE 0x1 | |
#define FT_RENDER_EXISTINGSURFACE 0x2 | |
/********************************************************** | |
* Global module types | |
**********************************************************/ | |
typedef struct _scale_s { | |
FT_UInt x, y; | |
} Scale_t; | |
typedef FT_Angle Angle_t; | |
struct fontinternals_; | |
struct freetypeinstance_; | |
typedef struct { | |
FT_Long font_index; | |
FT_Open_Args open_args; | |
} PgFontId; | |
typedef struct { | |
PyObject_HEAD | |
PgFontId id; | |
PyObject *path; | |
int is_scalable; | |
Scale_t face_size; | |
FT_Int16 style; | |
FT_Int16 render_flags; | |
double strength; | |
double underline_adjustment; | |
FT_UInt resolution; | |
Angle_t rotation; | |
FT_Matrix transform; | |
FT_Byte fgcolor[4]; | |
struct freetypeinstance_ *freetype; /* Personal reference */ | |
struct fontinternals_ *_internals; | |
} PgFontObject; | |
#define PgFont_IS_ALIVE(o) \ | |
(((PgFontObject *)(o))->_internals != 0) | |
/********************************************************** | |
* Module declaration | |
**********************************************************/ | |
#define PYGAMEAPI_FREETYPE_FIRSTSLOT 0 | |
#define PYGAMEAPI_FREETYPE_NUMSLOTS 2 | |
#ifndef PYGAME_FREETYPE_INTERNAL | |
#define PgFont_Check(x) ((x)->ob_type == (PyTypeObject*)PgFREETYPE_C_API[0]) | |
#define PgFont_Type (*(PyTypeObject*)PgFREETYPE_C_API[1]) | |
#define PgFont_New (*(PyObject*(*)(const char*, long))PgFREETYPE_C_API[1]) | |
#define import_pygame_freetype() \ | |
_IMPORT_PYGAME_MODULE(freetype, FREETYPE, PgFREETYPE_C_API) | |
static void *PgFREETYPE_C_API[PYGAMEAPI_FREETYPE_NUMSLOTS] = {0}; | |
#endif /* PYGAME_FREETYPE_INTERNAL */ | |
#endif /* _PYGAME_FREETYPE_H_ */ |
#include <Python.h> | |
#include "bitmask.h" | |
#define PYGAMEAPI_MASK_FIRSTSLOT 0 | |
#define PYGAMEAPI_MASK_NUMSLOTS 1 | |
#define PYGAMEAPI_LOCAL_ENTRY "_PYGAME_C_API" | |
typedef struct { | |
PyObject_HEAD | |
bitmask_t *mask; | |
} PyMaskObject; | |
#define PyMask_AsBitmap(x) (((PyMaskObject*)x)->mask) | |
#ifndef PYGAMEAPI_MASK_INTERNAL | |
#define PyMask_Type (*(PyTypeObject*)PyMASK_C_API[0]) | |
#define PyMask_Check(x) ((x)->ob_type == &PyMask_Type) | |
#define import_pygame_mask() \ | |
_IMPORT_PYGAME_MODULE(mask, MASK, PyMASK_C_API) | |
static void* PyMASK_C_API[PYGAMEAPI_MASK_NUMSLOTS] = {NULL}; | |
#endif /* #ifndef PYGAMEAPI_MASK_INTERNAL */ | |
/* | |
pygame - Python Game Library | |
Copyright (C) 2000-2001 Pete Shinners | |
This library is free software; you can redistribute it and/or | |
modify it under the terms of the GNU Library General Public | |
License as published by the Free Software Foundation; either | |
version 2 of the License, or (at your option) any later version. | |
This library is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
Library General Public License for more details. | |
You should have received a copy of the GNU Library General Public | |
License along with this library; if not, write to the Free | |
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
Pete Shinners | |
[email protected] | |
*/ | |
#include <Python.h> | |
#include <SDL_mixer.h> | |
#include <structmember.h> | |
/* test mixer initializations */ | |
#define MIXER_INIT_CHECK() \ | |
if(!SDL_WasInit(SDL_INIT_AUDIO)) \ | |
return RAISE(PyExc_SDLError, "mixer system not initialized") | |
#define PYGAMEAPI_MIXER_FIRSTSLOT 0 | |
#define PYGAMEAPI_MIXER_NUMSLOTS 7 | |
typedef struct { | |
PyObject_HEAD | |
Mix_Chunk *chunk; | |
Uint8 *mem; | |
PyObject *weakreflist; | |
} PySoundObject; | |
typedef struct { | |
PyObject_HEAD | |
int chan; | |
} PyChannelObject; | |
#define PySound_AsChunk(x) (((PySoundObject*)x)->chunk) | |
#define PyChannel_AsInt(x) (((PyChannelObject*)x)->chan) | |
#ifndef PYGAMEAPI_MIXER_INTERNAL | |
#define PySound_Check(x) ((x)->ob_type == (PyTypeObject*)PyMIXER_C_API[0]) | |
#define PySound_Type (*(PyTypeObject*)PyMIXER_C_API[0]) | |
#define PySound_New (*(PyObject*(*)(Mix_Chunk*))PyMIXER_C_API[1]) | |
#define PySound_Play (*(PyObject*(*)(PyObject*, PyObject*))PyMIXER_C_API[2]) | |
#define PyChannel_Check(x) ((x)->ob_type == (PyTypeObject*)PyMIXER_C_API[3]) | |
#define PyChannel_Type (*(PyTypeObject*)PyMIXER_C_API[3]) | |
#define PyChannel_New (*(PyObject*(*)(int))PyMIXER_C_API[4]) | |
#define PyMixer_AutoInit (*(PyObject*(*)(PyObject*, PyObject*))PyMIXER_C_API[5]) | |
#define PyMixer_AutoQuit (*(void(*)(void))PyMIXER_C_API[6]) | |
#define import_pygame_mixer() \ | |
_IMPORT_PYGAME_MODULE(mixer, MIXER, PyMIXER_C_API) | |
static void* PyMIXER_C_API[PYGAMEAPI_MIXER_NUMSLOTS] = {NULL}; | |
#endif | |
/* array structure interface version 3 declarations */ | |
#if !defined(PG_ARRAYINTER_HEADER) | |
#define PG_ARRAYINTER_HEADER | |
static const int PAI_CONTIGUOUS = 0x01; | |
static const int PAI_FORTRAN = 0x02; | |
static const int PAI_ALIGNED = 0x100; | |
static const int PAI_NOTSWAPPED = 0x200; | |
static const int PAI_WRITEABLE = 0x400; | |
static const int PAI_ARR_HAS_DESCR = 0x800; | |
typedef struct { | |
int two; /* contains the integer 2 -- simple sanity check */ | |
int nd; /* number of dimensions */ | |
char typekind; /* kind in array -- character code of typestr */ | |
int itemsize; /* size of each element */ | |
int flags; /* flags indicating how the data should be */ | |
/* interpreted */ | |
Py_intptr_t *shape; /* A length-nd array of shape information */ | |
Py_intptr_t *strides; /* A length-nd array of stride information */ | |
void *data; /* A pointer to the first element of the array */ | |
PyObject *descr; /* NULL or a data-description */ | |
} PyArrayInterface; | |
#endif |
/* | |
pygame - Python Game Library | |
Copyright (C) 2000-2001 Pete Shinners | |
Copyright (C) 2007 Rene Dudfield, Richard Goedeken | |
This library is free software; you can redistribute it and/or | |
modify it under the terms of the GNU Library General Public | |
License as published by the Free Software Foundation; either | |
version 2 of the License, or (at your option) any later version. | |
This library is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
Library General Public License for more details. | |
You should have received a copy of the GNU Library General Public | |
License along with this library; if not, write to the Free | |
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
Pete Shinners | |
[email protected] | |
*/ | |
/* Bufferproxy module C api. | |
Depends on pygame.h being included first. | |
*/ | |
#if !defined(PG_BUFPROXY_HEADER) | |
#define PYGAMEAPI_BUFPROXY_NUMSLOTS 4 | |
#define PYGAMEAPI_BUFPROXY_FIRSTSLOT 0 | |
#if !(defined(PYGAMEAPI_BUFPROXY_INTERNAL) || defined(NO_PYGAME_C_API)) | |
static void *PgBUFPROXY_C_API[PYGAMEAPI_BUFPROXY_NUMSLOTS]; | |
typedef PyObject *(*_pgbufproxy_new_t)(PyObject *, getbufferproc); | |
typedef PyObject *(*_pgbufproxy_get_obj_t)(PyObject *); | |
typedef int (*_pgbufproxy_trip_t)(PyObject *); | |
#define PgBufproxy_Type (*(PyTypeObject*)PgBUFPROXY_C_API[0]) | |
#define PgBufproxy_New (*(_pgbufproxy_new_t)PgBUFPROXY_C_API[1]) | |
#define PgBufproxy_GetParent \ | |
(*(_pgbufproxy_get_obj_t)PgBUFPROXY_C_API[2]) | |
#define PgBufproxy_Trip (*(_pgbufproxy_trip_t)PgBUFPROXY_C_API[3]) | |
#define PgBufproxy_Check(x) ((x)->ob_type == (PgBufproxy_Type)) | |
#define import_pygame_bufferproxy() \ | |
_IMPORT_PYGAME_MODULE(bufferproxy, BUFPROXY, PgBUFPROXY_C_API) | |
#endif /* #if !(defined(PYGAMEAPI_BUFPROXY_INTERNAL) || ... */ | |
#define PG_BUFPROXY_HEADER | |
#endif /* #if !defined(PG_BUFPROXY_HEADER) */ |
/* Python 2.x/3.x compitibility tools | |
*/ | |
#if !defined(PGCOMPAT_H) | |
#define PGCOMPAT_H | |
#if PY_MAJOR_VERSION >= 3 | |
#define PY3 1 | |
/* Define some aliases for the removed PyInt_* functions */ | |
#define PyInt_Check(op) PyLong_Check(op) | |
#define PyInt_FromString PyLong_FromString | |
#define PyInt_FromUnicode PyLong_FromUnicode | |
#define PyInt_FromLong PyLong_FromLong | |
#define PyInt_FromSize_t PyLong_FromSize_t | |
#define PyInt_FromSsize_t PyLong_FromSsize_t | |
#define PyInt_AsLong PyLong_AsLong | |
#define PyInt_AsSsize_t PyLong_AsSsize_t | |
#define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask | |
#define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask | |
#define PyInt_AS_LONG PyLong_AS_LONG | |
#define PyNumber_Int PyNumber_Long | |
/* Weakrefs flags changed in 3.x */ | |
#define Py_TPFLAGS_HAVE_WEAKREFS 0 | |
/* Module init function returns new module instance. */ | |
#define MODINIT_RETURN(x) return x | |
#define MODINIT_DEFINE(mod_name) PyMODINIT_FUNC PyInit_##mod_name (void) | |
#define DECREF_MOD(mod) Py_DECREF (mod) | |
/* Type header differs. */ | |
#define TYPE_HEAD(x,y) PyVarObject_HEAD_INIT(x,y) | |
/* Text interface. Use unicode strings. */ | |
#define Text_Type PyUnicode_Type | |
#define Text_Check PyUnicode_Check | |
#define Text_FromUTF8 PyUnicode_FromString | |
#define Text_FromUTF8AndSize PyUnicode_FromStringAndSize | |
#define Text_FromFormat PyUnicode_FromFormat | |
#define Text_GetSize PyUnicode_GetSize | |
#define Text_GET_SIZE PyUnicode_GET_SIZE | |
/* Binary interface. Use bytes. */ | |
#define Bytes_Type PyBytes_Type | |
#define Bytes_Check PyBytes_Check | |
#define Bytes_Size PyBytes_Size | |
#define Bytes_AsString PyBytes_AsString | |
#define Bytes_AsStringAndSize PyBytes_AsStringAndSize | |
#define Bytes_FromStringAndSize PyBytes_FromStringAndSize | |
#define Bytes_FromFormat PyBytes_FromFormat | |
#define Bytes_AS_STRING PyBytes_AS_STRING | |
#define Bytes_GET_SIZE PyBytes_GET_SIZE | |
#define Bytes_AsDecodeObject PyBytes_AsDecodedObject | |
#define Object_Unicode PyObject_Str | |
#define IsTextObj(x) (PyUnicode_Check(x) || PyBytes_Check(x)) | |
/* Renamed builtins */ | |
#define BUILTINS_MODULE "builtins" | |
#define BUILTINS_UNICODE "str" | |
#define BUILTINS_UNICHR "chr" | |
/* Defaults for unicode file path encoding */ | |
#define UNICODE_DEF_FS_CODEC Py_FileSystemDefaultEncoding | |
#if defined(MS_WIN32) | |
#define UNICODE_DEF_FS_ERROR "replace" | |
#else | |
#define UNICODE_DEF_FS_ERROR "surrogateescape" | |
#endif | |
#else /* #if PY_MAJOR_VERSION >= 3 */ | |
#define PY3 0 | |
/* Module init function returns nothing. */ | |
#define MODINIT_RETURN(x) return | |
#define MODINIT_DEFINE(mod_name) PyMODINIT_FUNC init##mod_name (void) | |
#define DECREF_MOD(mod) | |
/* Type header differs. */ | |
#define TYPE_HEAD(x,y) \ | |
PyObject_HEAD_INIT(x) \ | |
0, | |
/* Text interface. Use ascii strings. */ | |
#define Text_Type PyString_Type | |
#define Text_Check PyString_Check | |
#define Text_FromUTF8 PyString_FromString | |
#define Text_FromUTF8AndSize PyString_FromStringAndSize | |
#define Text_FromFormat PyString_FromFormat | |
#define Text_GetSize PyString_GetSize | |
#define Text_GET_SIZE PyString_GET_SIZE | |
/* Binary interface. Use ascii strings. */ | |
#define Bytes_Type PyString_Type | |
#define Bytes_Check PyString_Check | |
#define Bytes_Size PyString_Size | |
#define Bytes_AsString PyString_AsString | |
#define Bytes_AsStringAndSize PyString_AsStringAndSize | |
#define Bytes_FromStringAndSize PyString_FromStringAndSize | |
#define Bytes_FromFormat PyString_FromFormat | |
#define Bytes_AS_STRING PyString_AS_STRING | |
#define Bytes_GET_SIZE PyString_GET_SIZE | |
#define Bytes_AsDecodedObject PyString_AsDecodedObject | |
#define Object_Unicode PyObject_Unicode | |
/* Renamed builtins */ | |
#define BUILTINS_MODULE "__builtin__" | |
#define BUILTINS_UNICODE "unicode" | |
#define BUILTINS_UNICHR "unichr" | |
/* Defaults for unicode file path encoding */ | |
#define UNICODE_DEF_FS_CODEC Py_FileSystemDefaultEncoding | |
#define UNICODE_DEF_FS_ERROR "strict" | |
#endif /* #if PY_MAJOR_VERSION >= 3 */ | |
#define PY2 (!PY3) | |
#define MODINIT_ERROR MODINIT_RETURN (NULL) | |
/* Module state. These macros are used to define per-module macros. | |
* v - global state variable (Python 2.x) | |
* s - global state structure (Python 3.x) | |
*/ | |
#define PY2_GETSTATE(v) (&(v)) | |
#define PY3_GETSTATE(s, m) ((struct s *) PyModule_GetState (m)) | |
/* Pep 3123: Making PyObject_HEAD conform to standard C */ | |
#if !defined(Py_TYPE) | |
#define Py_TYPE(o) (((PyObject *)(o))->ob_type) | |
#define Py_REFCNT(o) (((PyObject *)(o))->ob_refcnt) | |
#define Py_SIZE(o) (((PyVarObject *)(o))->ob_size) | |
#endif | |
/* Encode a unicode file path */ | |
#define Unicode_AsEncodedPath(u) \ | |
PyUnicode_AsEncodedString ((u), UNICODE_DEF_FS_CODEC, UNICODE_DEF_FS_ERROR) | |
/* Relative paths introduced in Python 2.6 */ | |
#if PY_VERSION_HEX >= 0x02060000 | |
#define HAVE_RELATIVE_IMPORT 1 | |
#else | |
#define HAVE_RELATIVE_IMPORT 0 | |
#endif | |
#if HAVE_RELATIVE_IMPORT | |
#define RELATIVE_MODULE(m) ("." m) | |
#else | |
#define RELATIVE_MODULE(m) (m) | |
#endif | |
/* Python 3 (PEP 3118) buffer protocol */ | |
#if PY_VERSION_HEX >= 0x02060000 | |
#define HAVE_NEW_BUFPROTO 1 | |
#else | |
#define HAVE_NEW_BUFPROTO 0 | |
#endif | |
#define HAVE_OLD_BUFPROTO PY2 | |
#if !defined(PG_ENABLE_OLDBUF) /* allow for command line override */ | |
#if HAVE_OLD_BUFPROTO | |
#define PG_ENABLE_OLDBUF 1 | |
#else | |
#define PG_ENABLE_OLDBUF 0 | |
#endif | |
#endif | |
#ifndef Py_TPFLAGS_HAVE_NEWBUFFER | |
#define Py_TPFLAGS_HAVE_NEWBUFFER 0 | |
#endif | |
#ifndef Py_TPFLAGS_HAVE_CLASS | |
#define Py_TPFLAGS_HAVE_CLASS 0 | |
#endif | |
#ifndef Py_TPFLAGS_CHECKTYPES | |
#define Py_TPFLAGS_CHECKTYPES 0 | |
#endif | |
#if PY_VERSION_HEX >= 0x03020000 | |
#define Slice_GET_INDICES_EX(slice, length, start, stop, step, slicelength) \ | |
PySlice_GetIndicesEx(slice, length, start, stop, step, slicelength) | |
#else | |
#define Slice_GET_INDICES_EX(slice, length, start, stop, step, slicelength) \ | |
PySlice_GetIndicesEx((PySliceObject *)(slice), length, \ | |
start, stop, step, slicelength) | |
#endif | |
/* Python 2.4 (PEP 353) ssize_t */ | |
#if PY_VERSION_HEX < 0x02050000 | |
#define PyInt_AsSsize_t PyInt_AsLong | |
#define PyInt_FromSsizt_t PyInt_FromLong | |
#endif | |
/* Support new buffer protocol? */ | |
#if !defined(PG_ENABLE_NEWBUF) /* allow for command line override */ | |
#if HAVE_NEW_BUFPROTO && !defined(PYPY_VERSION) | |
#define PG_ENABLE_NEWBUF 1 | |
#else | |
#define PG_ENABLE_NEWBUF 0 | |
#endif | |
#endif | |
#endif /* #if !defined(PGCOMPAT_H) */ |
#if !defined(PGOPENGL_H) | |
#define PGOPENGL_H | |
/** This header includes definitions of Opengl functions as pointer types for | |
** use with the SDL function SDL_GL_GetProcAddress. | |
**/ | |
#if defined(_WIN32) | |
#define GL_APIENTRY __stdcall | |
#else | |
#define GL_APIENTRY | |
#endif | |
typedef void (GL_APIENTRY *GL_glReadPixels_Func)(int, int, int, int, unsigned int, unsigned int, void*); | |
#endif |
/* | |
pygame - Python Game Library | |
Copyright (C) 2000-2001 Pete Shinners | |
This library is free software; you can redistribute it and/or | |
modify it under the terms of the GNU Library General Public | |
License as published by the Free Software Foundation; either | |
version 2 of the License, or (at your option) any later version. | |
This library is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
Library General Public License for more details. | |
You should have received a copy of the GNU Library General Public | |
License along with this library; if not, write to the Free | |
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
Pete Shinners | |
[email protected] | |
*/ | |
/* To allow the Pygame C api to be globally shared by all code within an | |
* extension module built from multiple C files, only include the pygame.h | |
* header within the top level C file, the one which calls the | |
* 'import_pygame_*' macros. All other C source files of the module should | |
* include _pygame.h instead. | |
*/ | |
#ifndef PYGAME_H | |
#define PYGAME_H | |
#include "_pygame.h" | |
#endif |
/* | |
pygame - Python Game Library | |
Copyright (C) 2006, 2007 Rene Dudfield, Marcus von Appen | |
Originally put in the public domain by Sam Lantinga. | |
This library is free software; you can redistribute it and/or | |
modify it under the terms of the GNU Library General Public | |
License as published by the Free Software Foundation; either | |
version 2 of the License, or (at your option) any later version. | |
This library is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
Library General Public License for more details. | |
You should have received a copy of the GNU Library General Public | |
License along with this library; if not, write to the Free | |
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
*/ | |
/* This is unconditionally defined in Python.h */ | |
#if defined(_POSIX_C_SOURCE) | |
#undef _POSIX_C_SOURCE | |
#endif | |
#include <Python.h> | |
/* Handle clipboard text and data in arbitrary formats */ | |
/** | |
* Predefined supported pygame scrap types. | |
*/ | |
#define PYGAME_SCRAP_TEXT "text/plain" | |
#define PYGAME_SCRAP_BMP "image/bmp" | |
#define PYGAME_SCRAP_PPM "image/ppm" | |
#define PYGAME_SCRAP_PBM "image/pbm" | |
/** | |
* The supported scrap clipboard types. | |
* | |
* This is only relevant in a X11 environment, which supports mouse | |
* selections as well. For Win32 and MacOS environments the default | |
* clipboard is used, no matter what value is passed. | |
*/ | |
typedef enum | |
{ | |
SCRAP_CLIPBOARD, | |
SCRAP_SELECTION /* only supported in X11 environments. */ | |
} ScrapClipType; | |
/** | |
* Macro for initialization checks. | |
*/ | |
#define PYGAME_SCRAP_INIT_CHECK() \ | |
if(!pygame_scrap_initialized()) \ | |
return (PyErr_SetString (PyExc_SDLError, \ | |
"scrap system not initialized."), NULL) | |
/** | |
* \brief Checks, whether the pygame scrap module was initialized. | |
* | |
* \return 1 if the modules was initialized, 0 otherwise. | |
*/ | |
extern int | |
pygame_scrap_initialized (void); | |
/** | |
* \brief Initializes the pygame scrap module internals. Call this before any | |
* other method. | |
* | |
* \return 1 on successful initialization, 0 otherwise. | |
*/ | |
extern int | |
pygame_scrap_init (void); | |
/** | |
* \brief Checks, whether the pygame window lost the clipboard focus or not. | |
* | |
* \return 1 if the window lost the focus, 0 otherwise. | |
*/ | |
extern int | |
pygame_scrap_lost (void); | |
/** | |
* \brief Places content of a specific type into the clipboard. | |
* | |
* \note For X11 the following notes are important: The following types | |
* are reserved for internal usage and thus will throw an error on | |
* setting them: "TIMESTAMP", "TARGETS", "SDL_SELECTION". | |
* Setting PYGAME_SCRAP_TEXT ("text/plain") will also automatically | |
* set the X11 types "STRING" (XA_STRING), "TEXT" and "UTF8_STRING". | |
* | |
* For Win32 the following notes are important: Setting | |
* PYGAME_SCRAP_TEXT ("text/plain") will also automatically set | |
* the Win32 type "TEXT" (CF_TEXT). | |
* | |
* For QNX the following notes are important: Setting | |
* PYGAME_SCRAP_TEXT ("text/plain") will also automatically set | |
* the QNX type "TEXT" (Ph_CL_TEXT). | |
* | |
* \param type The type of the content. | |
* \param srclen The length of the content. | |
* \param src The NULL terminated content. | |
* \return 1, if the content could be successfully pasted into the clipboard, | |
* 0 otherwise. | |
*/ | |
extern int | |
pygame_scrap_put (char *type, int srclen, char *src); | |
/** | |
* \brief Gets the current content from the clipboard. | |
* | |
* \note The received content does not need to be the content previously | |
* placed in the clipboard using pygame_put_scrap(). See the | |
* pygame_put_scrap() notes for more details. | |
* | |
* \param type The type of the content to receive. | |
* \param count The size of the returned content. | |
* \return The content or NULL in case of an error or if no content of the | |
* specified type was available. | |
*/ | |
extern char* | |
pygame_scrap_get (char *type, unsigned long *count); | |
/** | |
* \brief Gets the currently available content types from the clipboard. | |
* | |
* \return The different available content types or NULL in case of an | |
* error or if no content type is available. | |
*/ | |
extern char** | |
pygame_scrap_get_types (void); | |
/** | |
* \brief Checks whether content for the specified scrap type is currently | |
* available in the clipboard. | |
* | |
* \param type The type to check for. | |
* \return 1, if there is content and 0 otherwise. | |
*/ | |
extern int | |
pygame_scrap_contains (char *type); |
/* | |
pygame - Python Game Library | |
Copyright (C) 2000-2001 Pete Shinners | |
Copyright (C) 2007 Marcus von Appen | |
This library is free software; you can redistribute it and/or | |
modify it under the terms of the GNU Library General Public | |
License as published by the Free Software Foundation; either | |
version 2 of the License, or (at your option) any later version. | |
This library is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
Library General Public License for more details. | |
You should have received a copy of the GNU Library General Public | |
License along with this library; if not, write to the Free | |
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
Pete Shinners | |
[email protected] | |
*/ | |
#ifndef SURFACE_H | |
#define SURFACE_H | |
/* This is defined in SDL.h */ | |
#if defined(_POSIX_C_SOURCE) | |
#undef _POSIX_C_SOURCE | |
#endif | |
#include <SDL.h> | |
#include "pygame.h" | |
#define PYGAME_BLEND_ADD 0x1 | |
#define PYGAME_BLEND_SUB 0x2 | |
#define PYGAME_BLEND_MULT 0x3 | |
#define PYGAME_BLEND_MIN 0x4 | |
#define PYGAME_BLEND_MAX 0x5 | |
#define PYGAME_BLEND_RGB_ADD 0x1 | |
#define PYGAME_BLEND_RGB_SUB 0x2 | |
#define PYGAME_BLEND_RGB_MULT 0x3 | |
#define PYGAME_BLEND_RGB_MIN 0x4 | |
#define PYGAME_BLEND_RGB_MAX 0x5 | |
#define PYGAME_BLEND_RGBA_ADD 0x6 | |
#define PYGAME_BLEND_RGBA_SUB 0x7 | |
#define PYGAME_BLEND_RGBA_MULT 0x8 | |
#define PYGAME_BLEND_RGBA_MIN 0x9 | |
#define PYGAME_BLEND_RGBA_MAX 0x10 | |
#define PYGAME_BLEND_PREMULTIPLIED 0x11 | |
#if SDL_BYTEORDER == SDL_LIL_ENDIAN | |
#define GET_PIXEL_24(b) (b[0] + (b[1] << 8) + (b[2] << 16)) | |
#else | |
#define GET_PIXEL_24(b) (b[2] + (b[1] << 8) + (b[0] << 16)) | |
#endif | |
#define GET_PIXEL(pxl, bpp, source) \ | |
switch (bpp) \ | |
{ \ | |
case 2: \ | |
pxl = *((Uint16 *) (source)); \ | |
break; \ | |
case 4: \ | |
pxl = *((Uint32 *) (source)); \ | |
break; \ | |
default: \ | |
{ \ | |
Uint8 *b = (Uint8 *) source; \ | |
pxl = GET_PIXEL_24(b); \ | |
} \ | |
break; \ | |
} | |
#define GET_PIXELVALS(_sR, _sG, _sB, _sA, px, fmt, ppa) \ | |
_sR = ((px & fmt->Rmask) >> fmt->Rshift); \ | |
_sR = (_sR << fmt->Rloss) + (_sR >> (8 - (fmt->Rloss << 1))); \ | |
_sG = ((px & fmt->Gmask) >> fmt->Gshift); \ | |
_sG = (_sG << fmt->Gloss) + (_sG >> (8 - (fmt->Gloss << 1))); \ | |
_sB = ((px & fmt->Bmask) >> fmt->Bshift); \ | |
_sB = (_sB << fmt->Bloss) + (_sB >> (8 - (fmt->Bloss << 1))); \ | |
if (ppa) \ | |
{ \ | |
_sA = ((px & fmt->Amask) >> fmt->Ashift); \ | |
_sA = (_sA << fmt->Aloss) + (_sA >> (8 - (fmt->Aloss << 1))); \ | |
} \ | |
else \ | |
{ \ | |
_sA = 255; \ | |
} | |
#define GET_PIXELVALS_1(sr, sg, sb, sa, _src, _fmt) \ | |
sr = _fmt->palette->colors[*((Uint8 *) (_src))].r; \ | |
sg = _fmt->palette->colors[*((Uint8 *) (_src))].g; \ | |
sb = _fmt->palette->colors[*((Uint8 *) (_src))].b; \ | |
sa = 255; | |
#if SDL_BYTEORDER == SDL_LIL_ENDIAN | |
#define SET_OFFSETS_24(or, og, ob, fmt) \ | |
{ \ | |
or = (fmt->Rshift == 0 ? 0 : \ | |
fmt->Rshift == 8 ? 1 : \ | |
2 ); \ | |
og = (fmt->Gshift == 0 ? 0 : \ | |
fmt->Gshift == 8 ? 1 : \ | |
2 ); \ | |
ob = (fmt->Bshift == 0 ? 0 : \ | |
fmt->Bshift == 8 ? 1 : \ | |
2 ); \ | |
} | |
#define SET_OFFSETS_32(or, og, ob, fmt) \ | |
{ \ | |
or = (fmt->Rshift == 0 ? 0 : \ | |
fmt->Rshift == 8 ? 1 : \ | |
fmt->Rshift == 16 ? 2 : \ | |
3 ); \ | |
og = (fmt->Gshift == 0 ? 0 : \ | |
fmt->Gshift == 8 ? 1 : \ | |
fmt->Gshift == 16 ? 2 : \ | |
3 ); \ | |
ob = (fmt->Bshift == 0 ? 0 : \ | |
fmt->Bshift == 8 ? 1 : \ | |
fmt->Bshift == 16 ? 2 : \ | |
3 ); \ | |
} | |
#else | |
#define SET_OFFSETS_24(or, og, ob, fmt) \ | |
{ \ | |
or = (fmt->Rshift == 0 ? 2 : \ | |
fmt->Rshift == 8 ? 1 : \ | |
0 ); \ | |
og = (fmt->Gshift == 0 ? 2 : \ | |
fmt->Gshift == 8 ? 1 : \ | |
0 ); \ | |
ob = (fmt->Bshift == 0 ? 2 : \ | |
fmt->Bshift == 8 ? 1 : \ | |
0 ); \ | |
} | |
#define SET_OFFSETS_32(or, og, ob, fmt) \ | |
{ \ | |
or = (fmt->Rshift == 0 ? 3 : \ | |
fmt->Rshift == 8 ? 2 : \ | |
fmt->Rshift == 16 ? 1 : \ | |
0 ); \ | |
og = (fmt->Gshift == 0 ? 3 : \ | |
fmt->Gshift == 8 ? 2 : \ | |
fmt->Gshift == 16 ? 1 : \ | |
0 ); \ | |
ob = (fmt->Bshift == 0 ? 3 : \ | |
fmt->Bshift == 8 ? 2 : \ | |
fmt->Bshift == 16 ? 1 : \ | |
0 ); \ | |
} | |
#endif | |
#define CREATE_PIXEL(buf, r, g, b, a, bp, ft) \ | |
switch (bp) \ | |
{ \ | |
case 2: \ | |
*((Uint16 *) (buf)) = \ | |
((r >> ft->Rloss) << ft->Rshift) | \ | |
((g >> ft->Gloss) << ft->Gshift) | \ | |
((b >> ft->Bloss) << ft->Bshift) | \ | |
((a >> ft->Aloss) << ft->Ashift); \ | |
break; \ | |
case 4: \ | |
*((Uint32 *) (buf)) = \ | |
((r >> ft->Rloss) << ft->Rshift) | \ | |
((g >> ft->Gloss) << ft->Gshift) | \ | |
((b >> ft->Bloss) << ft->Bshift) | \ | |
((a >> ft->Aloss) << ft->Ashift); \ | |
break; \ | |
} | |
/* Pretty good idea from Tom Duff :-). */ | |
#define LOOP_UNROLLED4(code, n, width) \ | |
n = (width + 3) / 4; \ | |
switch (width & 3) \ | |
{ \ | |
case 0: do { code; \ | |
case 3: code; \ | |
case 2: code; \ | |
case 1: code; \ | |
} while (--n > 0); \ | |
} | |
/* Used in the srcbpp == dstbpp == 1 blend functions */ | |
#define REPEAT_3(code) \ | |
code; \ | |
code; \ | |
code; | |
#define REPEAT_4(code) \ | |
code; \ | |
code; \ | |
code; \ | |
code; | |
#define BLEND_ADD(tmp, sR, sG, sB, sA, dR, dG, dB, dA) \ | |
tmp = dR + sR; dR = (tmp <= 255 ? tmp : 255); \ | |
tmp = dG + sG; dG = (tmp <= 255 ? tmp : 255); \ | |
tmp = dB + sB; dB = (tmp <= 255 ? tmp : 255); | |
#define BLEND_SUB(tmp, sR, sG, sB, sA, dR, dG, dB, dA) \ | |
tmp = dR - sR; dR = (tmp >= 0 ? tmp : 0); \ | |
tmp = dG - sG; dG = (tmp >= 0 ? tmp : 0); \ | |
tmp = dB - sB; dB = (tmp >= 0 ? tmp : 0); | |
#define BLEND_MULT(sR, sG, sB, sA, dR, dG, dB, dA) \ | |
dR = (dR && sR) ? (dR * sR) >> 8 : 0; \ | |
dG = (dG && sG) ? (dG * sG) >> 8 : 0; \ | |
dB = (dB && sB) ? (dB * sB) >> 8 : 0; | |
#define BLEND_MIN(sR, sG, sB, sA, dR, dG, dB, dA) \ | |
if(sR < dR) { dR = sR; } \ | |
if(sG < dG) { dG = sG; } \ | |
if(sB < dB) { dB = sB; } | |
#define BLEND_MAX(sR, sG, sB, sA, dR, dG, dB, dA) \ | |
if(sR > dR) { dR = sR; } \ | |
if(sG > dG) { dG = sG; } \ | |
if(sB > dB) { dB = sB; } | |
#define BLEND_RGBA_ADD(tmp, sR, sG, sB, sA, dR, dG, dB, dA) \ | |
tmp = dR + sR; dR = (tmp <= 255 ? tmp : 255); \ | |
tmp = dG + sG; dG = (tmp <= 255 ? tmp : 255); \ | |
tmp = dB + sB; dB = (tmp <= 255 ? tmp : 255); \ | |
tmp = dA + sA; dA = (tmp <= 255 ? tmp : 255); | |
#define BLEND_RGBA_SUB(tmp, sR, sG, sB, sA, dR, dG, dB, dA) \ | |
tmp = dR - sR; dR = (tmp >= 0 ? tmp : 0); \ | |
tmp = dG - sG; dG = (tmp >= 0 ? tmp : 0); \ | |
tmp = dB - sB; dB = (tmp >= 0 ? tmp : 0); \ | |
tmp = dA - sA; dA = (tmp >= 0 ? tmp : 0); | |
#define BLEND_RGBA_MULT(sR, sG, sB, sA, dR, dG, dB, dA) \ | |
dR = (dR && sR) ? (dR * sR) >> 8 : 0; \ | |
dG = (dG && sG) ? (dG * sG) >> 8 : 0; \ | |
dB = (dB && sB) ? (dB * sB) >> 8 : 0; \ | |
dA = (dA && sA) ? (dA * sA) >> 8 : 0; | |
#define BLEND_RGBA_MIN(sR, sG, sB, sA, dR, dG, dB, dA) \ | |
if(sR < dR) { dR = sR; } \ | |
if(sG < dG) { dG = sG; } \ | |
if(sB < dB) { dB = sB; } \ | |
if(sA < dA) { dA = sA; } | |
#define BLEND_RGBA_MAX(sR, sG, sB, sA, dR, dG, dB, dA) \ | |
if(sR > dR) { dR = sR; } \ | |
if(sG > dG) { dG = sG; } \ | |
if(sB > dB) { dB = sB; } \ | |
if(sA > dA) { dA = sA; } | |
#if 1 | |
/* Choose an alpha blend equation. If the sign is preserved on a right shift | |
* then use a specialized, faster, equation. Otherwise a more general form, | |
* where all additions are done before the shift, is needed. | |
*/ | |
#if (-1 >> 1) < 0 | |
#define ALPHA_BLEND_COMP(sC, dC, sA) ((((sC - dC) * sA + sC) >> 8) + dC) | |
#else | |
#define ALPHA_BLEND_COMP(sC, dC, sA) (((dC << 8) + (sC - dC) * sA + sC) >> 8) | |
#endif | |
#define ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB, dA) \ | |
do { \ | |
if (dA) \ | |
{ \ | |
dR = ALPHA_BLEND_COMP(sR, dR, sA); \ | |
dG = ALPHA_BLEND_COMP(sG, dG, sA); \ | |
dB = ALPHA_BLEND_COMP(sB, dB, sA); \ | |
dA = sA + dA - ((sA * dA) / 255); \ | |
} \ | |
else \ | |
{ \ | |
dR = sR; \ | |
dG = sG; \ | |
dB = sB; \ | |
dA = sA; \ | |
} \ | |
} while(0) | |
#define ALPHA_BLEND_PREMULTIPLIED_COMP(sC, dC, sA) (sC + dC - ((dC * sA) >> 8)) | |
#define ALPHA_BLEND_PREMULTIPLIED(tmp, sR, sG, sB, sA, dR, dG, dB, dA) \ | |
do { \ | |
tmp = ALPHA_BLEND_PREMULTIPLIED_COMP(sR, dR, sA); dR = (tmp > 255 ? 255 : tmp); \ | |
tmp = ALPHA_BLEND_PREMULTIPLIED_COMP(sG, dG, sA); dG = (tmp > 255 ? 255 : tmp); \ | |
tmp = ALPHA_BLEND_PREMULTIPLIED_COMP(sB, dB, sA); dB = (tmp > 255 ? 255 : tmp); \ | |
dA = sA + dA - ((sA * dA) / 255); \ | |
} while(0) | |
#elif 0 | |
#define ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB, dA) \ | |
do { \ | |
if(sA){ \ | |
if(dA && sA < 255){ \ | |
int dContrib = dA*(255 - sA)/255; \ | |
dA = sA+dA - ((sA*dA)/255); \ | |
dR = (dR*dContrib + sR*sA)/dA; \ | |
dG = (dG*dContrib + sG*sA)/dA; \ | |
dB = (dB*dContrib + sB*sA)/dA; \ | |
}else{ \ | |
dR = sR; \ | |
dG = sG; \ | |
dB = sB; \ | |
dA = sA; \ | |
} \ | |
} \ | |
} while(0) | |
#endif | |
int | |
surface_fill_blend (SDL_Surface *surface, SDL_Rect *rect, Uint32 color, | |
int blendargs); | |
void | |
surface_respect_clip_rect (SDL_Surface *surface, SDL_Rect *rect); | |
int | |
pygame_AlphaBlit (SDL_Surface * src, SDL_Rect * srcrect, | |
SDL_Surface * dst, SDL_Rect * dstrect, int the_args); | |
int | |
pygame_Blit (SDL_Surface * src, SDL_Rect * srcrect, | |
SDL_Surface * dst, SDL_Rect * dstrect, int the_args); | |
#endif /* SURFACE_H */ |
./setuptools-28.8.0-py3.6.egg | |
./pip-9.0.1-py3.6.egg |
[console_scripts] | |
pip = pip:main | |
pip3 = pip:main | |
pip3.6 = pip:main | |
Metadata-Version: 1.2 | |
Name: pip | |
Version: 9.0.1 | |
Summary: The PyPA recommended tool for installing Python packages. | |
Home-page: https://pip.pypa.io/ | |
Author: The pip developers | |
Author-email: [email protected] | |
License: MIT | |
Description: pip | |
=== | |
The `PyPA recommended | |
<https://packaging.python.org/en/latest/current/>`_ | |
tool for installing Python packages. | |
* `Installation <https://pip.pypa.io/en/stable/installing.html>`_ | |
* `Documentation <https://pip.pypa.io/>`_ | |
* `Changelog <https://pip.pypa.io/en/stable/news.html>`_ | |
* `Github Page <https://github.com/pypa/pip>`_ | |
* `Issue Tracking <https://github.com/pypa/pip/issues>`_ | |
* `User mailing list <http://groups.google.com/group/python-virtualenv>`_ | |
* `Dev mailing list <http://groups.google.com/group/pypa-dev>`_ | |
* User IRC: #pypa on Freenode. | |
* Dev IRC: #pypa-dev on Freenode. | |
.. image:: https://img.shields.io/pypi/v/pip.svg | |
:target: https://pypi.python.org/pypi/pip | |
.. image:: https://img.shields.io/travis/pypa/pip/master.svg | |
:target: http://travis-ci.org/pypa/pip | |
.. image:: https://img.shields.io/appveyor/ci/pypa/pip.svg | |
:target: https://ci.appveyor.com/project/pypa/pip/history | |
.. image:: https://readthedocs.org/projects/pip/badge/?version=stable | |
:target: https://pip.pypa.io/en/stable | |
Code of Conduct | |
--------------- | |
Everyone interacting in the pip project's codebases, issue trackers, chat | |
rooms, and mailing lists is expected to follow the `PyPA Code of Conduct`_. | |
.. _PyPA Code of Conduct: https://www.pypa.io/en/latest/code-of-conduct/ | |
Keywords: easy_install distutils setuptools egg virtualenv | |
Platform: UNKNOWN | |
Classifier: Development Status :: 5 - Production/Stable | |
Classifier: Intended Audience :: Developers | |
Classifier: License :: OSI Approved :: MIT License | |
Classifier: Topic :: Software Development :: Build Tools | |
Classifier: Programming Language :: Python :: 2 | |
Classifier: Programming Language :: Python :: 2.6 | |
Classifier: Programming Language :: Python :: 2.7 | |
Classifier: Programming Language :: Python :: 3 | |
Classifier: Programming Language :: Python :: 3.3 | |
Classifier: Programming Language :: Python :: 3.4 | |
Classifier: Programming Language :: Python :: 3.5 | |
Classifier: Programming Language :: Python :: Implementation :: PyPy | |
Requires-Python: >=2.6,!=3.0.*,!=3.1.*,!=3.2.* |
[testing] | |
pytest | |
virtualenv>=1.10 | |
scripttest>=1.3 | |
mock | |
pretend |
AUTHORS.txt | |
CHANGES.txt | |
LICENSE.txt | |
MANIFEST.in | |
README.rst | |
setup.cfg | |
setup.py | |
docs/Makefile | |
docs/__init__.py | |
docs/conf.py | |
docs/configuration.rst | |
docs/cookbook.rst | |
docs/development.rst | |
docs/index.rst | |
docs/installing.rst | |
docs/logic.rst | |
docs/make.bat | |
docs/news.rst | |
docs/pipext.py | |
docs/quickstart.rst | |
docs/usage.rst | |
docs/user_guide.rst | |
docs/reference/index.rst | |
docs/reference/pip.rst | |
docs/reference/pip_download.rst | |
docs/reference/pip_freeze.rst | |
docs/reference/pip_hash.rst | |
docs/reference/pip_install.rst | |
docs/reference/pip_list.rst | |
docs/reference/pip_search.rst | |
docs/reference/pip_show.rst | |
docs/reference/pip_uninstall.rst | |
docs/reference/pip_wheel.rst | |
pip/__init__.py | |
pip/__main__.py | |
pip/basecommand.py | |
pip/baseparser.py | |
pip/cmdoptions.py | |
pip/download.py | |
pip/exceptions.py | |
pip/index.py | |
pip/locations.py | |
pip/pep425tags.py | |
pip/status_codes.py | |
pip/wheel.py | |
pip.egg-info/PKG-INFO | |
pip.egg-info/SOURCES.txt | |
pip.egg-info/dependency_links.txt | |
pip.egg-info/entry_points.txt | |
pip.egg-info/not-zip-safe | |
pip.egg-info/requires.txt | |
pip.egg-info/top_level.txt | |
pip/_vendor/README.rst | |
pip/_vendor/__init__.py | |
pip/_vendor/appdirs.py | |
pip/_vendor/distro.py | |
pip/_vendor/ipaddress.py | |
pip/_vendor/ordereddict.py | |
pip/_vendor/pyparsing.py | |
pip/_vendor/re-vendor.py | |
pip/_vendor/retrying.py | |
pip/_vendor/six.py | |
pip/_vendor/vendor.txt | |
pip/_vendor/cachecontrol/__init__.py | |
pip/_vendor/cachecontrol/_cmd.py | |
pip/_vendor/cachecontrol/adapter.py | |
pip/_vendor/cachecontrol/cache.py | |
pip/_vendor/cachecontrol/compat.py | |
pip/_vendor/cachecontrol/controller.py | |
pip/_vendor/cachecontrol/filewrapper.py | |
pip/_vendor/cachecontrol/heuristics.py | |
pip/_vendor/cachecontrol/serialize.py | |
pip/_vendor/cachecontrol/wrapper.py | |
pip/_vendor/cachecontrol/caches/__init__.py | |
pip/_vendor/cachecontrol/caches/file_cache.py | |
pip/_vendor/cachecontrol/caches/redis_cache.py | |
pip/_vendor/colorama/__init__.py | |
pip/_vendor/colorama/ansi.py | |
pip/_vendor/colorama/ansitowin32.py | |
pip/_vendor/colorama/initialise.py | |
pip/_vendor/colorama/win32.py | |
pip/_vendor/colorama/winterm.py | |
pip/_vendor/distlib/__init__.py | |
pip/_vendor/distlib/compat.py | |
pip/_vendor/distlib/database.py | |
pip/_vendor/distlib/index.py | |
pip/_vendor/distlib/locators.py | |
pip/_vendor/distlib/manifest.py | |
pip/_vendor/distlib/markers.py | |
pip/_vendor/distlib/metadata.py | |
pip/_vendor/distlib/resources.py | |
pip/_vendor/distlib/scripts.py | |
pip/_vendor/distlib/t32.exe | |
pip/_vendor/distlib/t64.exe | |
pip/_vendor/distlib/util.py | |
pip/_vendor/distlib/version.py | |
pip/_vendor/distlib/w32.exe | |
pip/_vendor/distlib/w64.exe | |
pip/_vendor/distlib/wheel.py | |
pip/_vendor/distlib/_backport/__init__.py | |
pip/_vendor/distlib/_backport/misc.py | |
pip/_vendor/distlib/_backport/shutil.py | |
pip/_vendor/distlib/_backport/sysconfig.cfg | |
pip/_vendor/distlib/_backport/sysconfig.py | |
pip/_vendor/distlib/_backport/tarfile.py | |
pip/_vendor/html5lib/__init__.py | |
pip/_vendor/html5lib/_ihatexml.py | |
pip/_vendor/html5lib/_inputstream.py | |
pip/_vendor/html5lib/_tokenizer.py | |
pip/_vendor/html5lib/_utils.py | |
pip/_vendor/html5lib/constants.py | |
pip/_vendor/html5lib/html5parser.py | |
pip/_vendor/html5lib/serializer.py | |
pip/_vendor/html5lib/_trie/__init__.py | |
pip/_vendor/html5lib/_trie/_base.py | |
pip/_vendor/html5lib/_trie/datrie.py | |
pip/_vendor/html5lib/_trie/py.py | |
pip/_vendor/html5lib/filters/__init__.py | |
pip/_vendor/html5lib/filters/alphabeticalattributes.py | |
pip/_vendor/html5lib/filters/base.py | |
pip/_vendor/html5lib/filters/inject_meta_charset.py | |
pip/_vendor/html5lib/filters/lint.py | |
pip/_vendor/html5lib/filters/optionaltags.py | |
pip/_vendor/html5lib/filters/sanitizer.py | |
pip/_vendor/html5lib/filters/whitespace.py | |
pip/_vendor/html5lib/treeadapters/__init__.py | |
pip/_vendor/html5lib/treeadapters/genshi.py | |
pip/_vendor/html5lib/treeadapters/sax.py | |
pip/_vendor/html5lib/treebuilders/__init__.py | |
pip/_vendor/html5lib/treebuilders/base.py | |
pip/_vendor/html5lib/treebuilders/dom.py | |
pip/_vendor/html5lib/treebuilders/etree.py | |
pip/_vendor/html5lib/treebuilders/etree_lxml.py | |
pip/_vendor/html5lib/treewalkers/__init__.py | |
pip/_vendor/html5lib/treewalkers/base.py | |
pip/_vendor/html5lib/treewalkers/dom.py | |
pip/_vendor/html5lib/treewalkers/etree.py | |
pip/_vendor/html5lib/treewalkers/etree_lxml.py | |
pip/_vendor/html5lib/treewalkers/genshi.py | |
pip/_vendor/lockfile/__init__.py | |
pip/_vendor/lockfile/linklockfile.py | |
pip/_vendor/lockfile/mkdirlockfile.py | |
pip/_vendor/lockfile/pidlockfile.py | |
pip/_vendor/lockfile/sqlitelockfile.py | |
pip/_vendor/lockfile/symlinklockfile.py | |
pip/_vendor/packaging/__about__.py | |
pip/_vendor/packaging/__init__.py | |
pip/_vendor/packaging/_compat.py | |
pip/_vendor/packaging/_structures.py | |
pip/_vendor/packaging/markers.py | |
pip/_vendor/packaging/requirements.py | |
pip/_vendor/packaging/specifiers.py | |
pip/_vendor/packaging/utils.py | |
pip/_vendor/packaging/version.py | |
pip/_vendor/pkg_resources/__init__.py | |
pip/_vendor/progress/__init__.py | |
pip/_vendor/progress/bar.py | |
pip/_vendor/progress/counter.py | |
pip/_vendor/progress/helpers.py | |
pip/_vendor/progress/spinner.py | |
pip/_vendor/requests/__init__.py | |
pip/_vendor/requests/adapters.py | |
pip/_vendor/requests/api.py | |
pip/_vendor/requests/auth.py | |
pip/_vendor/requests/cacert.pem | |
pip/_vendor/requests/certs.py | |
pip/_vendor/requests/compat.py | |
pip/_vendor/requests/cookies.py | |
pip/_vendor/requests/exceptions.py | |
pip/_vendor/requests/hooks.py | |
pip/_vendor/requests/models.py | |
pip/_vendor/requests/sessions.py | |
pip/_vendor/requests/status_codes.py | |
pip/_vendor/requests/structures.py | |
pip/_vendor/requests/utils.py | |
pip/_vendor/requests/packages/__init__.py | |
pip/_vendor/requests/packages/chardet/__init__.py | |
pip/_vendor/requests/packages/chardet/big5freq.py | |
pip/_vendor/requests/packages/chardet/big5prober.py | |
pip/_vendor/requests/packages/chardet/chardetect.py | |
pip/_vendor/requests/packages/chardet/chardistribution.py | |
pip/_vendor/requests/packages/chardet/charsetgroupprober.py | |
pip/_vendor/requests/packages/chardet/charsetprober.py | |
pip/_vendor/requests/packages/chardet/codingstatemachine.py | |
pip/_vendor/requests/packages/chardet/compat.py | |
pip/_vendor/requests/packages/chardet/constants.py | |
pip/_vendor/requests/packages/chardet/cp949prober.py | |
pip/_vendor/requests/packages/chardet/escprober.py | |
pip/_vendor/requests/packages/chardet/escsm.py | |
pip/_vendor/requests/packages/chardet/eucjpprober.py | |
pip/_vendor/requests/packages/chardet/euckrfreq.py | |
pip/_vendor/requests/packages/chardet/euckrprober.py | |
pip/_vendor/requests/packages/chardet/euctwfreq.py | |
pip/_vendor/requests/packages/chardet/euctwprober.py | |
pip/_vendor/requests/packages/chardet/gb2312freq.py | |
pip/_vendor/requests/packages/chardet/gb2312prober.py | |
pip/_vendor/requests/packages/chardet/hebrewprober.py | |
pip/_vendor/requests/packages/chardet/jisfreq.py | |
pip/_vendor/requests/packages/chardet/jpcntx.py | |
pip/_vendor/requests/packages/chardet/langbulgarianmodel.py | |
pip/_vendor/requests/packages/chardet/langcyrillicmodel.py | |
pip/_vendor/requests/packages/chardet/langgreekmodel.py | |
pip/_vendor/requests/packages/chardet/langhebrewmodel.py | |
pip/_vendor/requests/packages/chardet/langhungarianmodel.py | |
pip/_vendor/requests/packages/chardet/langthaimodel.py | |
pip/_vendor/requests/packages/chardet/latin1prober.py | |
pip/_vendor/requests/packages/chardet/mbcharsetprober.py | |
pip/_vendor/requests/packages/chardet/mbcsgroupprober.py | |
pip/_vendor/requests/packages/chardet/mbcssm.py | |
pip/_vendor/requests/packages/chardet/sbcharsetprober.py | |
pip/_vendor/requests/packages/chardet/sbcsgroupprober.py | |
pip/_vendor/requests/packages/chardet/sjisprober.py | |
pip/_vendor/requests/packages/chardet/universaldetector.py | |
pip/_vendor/requests/packages/chardet/utf8prober.py | |
pip/_vendor/requests/packages/urllib3/__init__.py | |
pip/_vendor/requests/packages/urllib3/_collections.py | |
pip/_vendor/requests/packages/urllib3/connection.py | |
pip/_vendor/requests/packages/urllib3/connectionpool.py | |
pip/_vendor/requests/packages/urllib3/exceptions.py | |
pip/_vendor/requests/packages/urllib3/fields.py | |
pip/_vendor/requests/packages/urllib3/filepost.py | |
pip/_vendor/requests/packages/urllib3/poolmanager.py | |
pip/_vendor/requests/packages/urllib3/request.py | |
pip/_vendor/requests/packages/urllib3/response.py | |
pip/_vendor/requests/packages/urllib3/contrib/__init__.py | |
pip/_vendor/requests/packages/urllib3/contrib/appengine.py | |
pip/_vendor/requests/packages/urllib3/contrib/ntlmpool.py | |
pip/_vendor/requests/packages/urllib3/contrib/pyopenssl.py | |
pip/_vendor/requests/packages/urllib3/contrib/socks.py | |
pip/_vendor/requests/packages/urllib3/packages/__init__.py | |
pip/_vendor/requests/packages/urllib3/packages/ordered_dict.py | |
pip/_vendor/requests/packages/urllib3/packages/six.py | |
pip/_vendor/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py | |
pip/_vendor/requests/packages/urllib3/packages/ssl_match_hostname/_implementation.py | |
pip/_vendor/requests/packages/urllib3/util/__init__.py | |
pip/_vendor/requests/packages/urllib3/util/connection.py | |
pip/_vendor/requests/packages/urllib3/util/request.py | |
pip/_vendor/requests/packages/urllib3/util/response.py | |
pip/_vendor/requests/packages/urllib3/util/retry.py | |
pip/_vendor/requests/packages/urllib3/util/ssl_.py | |
pip/_vendor/requests/packages/urllib3/util/timeout.py | |
pip/_vendor/requests/packages/urllib3/util/url.py | |
pip/_vendor/webencodings/__init__.py | |
pip/_vendor/webencodings/labels.py | |
pip/_vendor/webencodings/mklabels.py | |
pip/_vendor/webencodings/tests.py | |
pip/_vendor/webencodings/x_user_defined.py | |
pip/commands/__init__.py | |
pip/commands/check.py | |
pip/commands/completion.py | |
pip/commands/download.py | |
pip/commands/freeze.py | |
pip/commands/hash.py | |
pip/commands/help.py | |
pip/commands/install.py | |
pip/commands/list.py | |
pip/commands/search.py | |
pip/commands/show.py | |
pip/commands/uninstall.py | |
pip/commands/wheel.py | |
pip/compat/__init__.py | |
pip/compat/dictconfig.py | |
pip/models/__init__.py | |
pip/models/index.py | |
pip/operations/__init__.py | |
pip/operations/check.py | |
pip/operations/freeze.py | |
pip/req/__init__.py | |
pip/req/req_file.py | |
pip/req/req_install.py | |
pip/req/req_set.py | |
pip/req/req_uninstall.py | |
pip/utils/__init__.py | |
pip/utils/appdirs.py | |
pip/utils/build.py | |
pip/utils/deprecation.py | |
pip/utils/encoding.py | |
pip/utils/filesystem.py | |
pip/utils/glibc.py | |
pip/utils/hashes.py | |
pip/utils/logging.py | |
pip/utils/outdated.py | |
pip/utils/packaging.py | |
pip/utils/setuptools_build.py | |
pip/utils/ui.py | |
pip/vcs/__init__.py | |
pip/vcs/bazaar.py | |
pip/vcs/git.py | |
pip/vcs/mercurial.py | |
pip/vcs/subversion.py |
pip |
#!/usr/bin/env python | |
from __future__ import absolute_import | |
import locale | |
import logging | |
import os | |
import optparse | |
import warnings | |
import sys | |
import re | |
# 2016-06-17 [email protected]: urllib3 1.14 added optional support for socks, | |
# but if invoked (i.e. imported), it will issue a warning to stderr if socks | |
# isn't available. requests unconditionally imports urllib3's socks contrib | |
# module, triggering this warning. The warning breaks DEP-8 tests (because of | |
# the stderr output) and is just plain annoying in normal usage. I don't want | |
# to add socks as yet another dependency for pip, nor do I want to allow-stder | |
# in the DEP-8 tests, so just suppress the warning. pdb tells me this has to | |
# be done before the import of pip.vcs. | |
from pip._vendor.requests.packages.urllib3.exceptions import DependencyWarning | |
warnings.filterwarnings("ignore", category=DependencyWarning) # noqa | |
from pip.exceptions import InstallationError, CommandError, PipError | |
from pip.utils import get_installed_distributions, get_prog | |
from pip.utils import deprecation, dist_is_editable | |
from pip.vcs import git, mercurial, subversion, bazaar # noqa | |
from pip.baseparser import ConfigOptionParser, UpdatingDefaultsHelpFormatter | |
from pip.commands import get_summaries, get_similar_commands | |
from pip.commands import commands_dict | |
from pip._vendor.requests.packages.urllib3.exceptions import ( | |
InsecureRequestWarning, | |
) | |
# assignment for flake8 to be happy | |
# This fixes a peculiarity when importing via __import__ - as we are | |
# initialising the pip module, "from pip import cmdoptions" is recursive | |
# and appears not to work properly in that situation. | |
import pip.cmdoptions | |
cmdoptions = pip.cmdoptions | |
# The version as used in the setup.py and the docs conf.py | |
__version__ = "9.0.1" | |
logger = logging.getLogger(__name__) | |
# Hide the InsecureRequestWarning from urllib3 | |
warnings.filterwarnings("ignore", category=InsecureRequestWarning) | |
def autocomplete(): | |
"""Command and option completion for the main option parser (and options) | |
and its subcommands (and options). | |
Enable by sourcing one of the completion shell scripts (bash, zsh or fish). | |
""" | |
# Don't complete if user hasn't sourced bash_completion file. | |
if 'PIP_AUTO_COMPLETE' not in os.environ: | |
return | |
cwords = os.environ['COMP_WORDS'].split()[1:] | |
cword = int(os.environ['COMP_CWORD']) | |
try: | |
current = cwords[cword - 1] | |
except IndexError: | |
current = '' | |
subcommands = [cmd for cmd, summary in get_summaries()] | |
options = [] | |
# subcommand | |
try: | |
subcommand_name = [w for w in cwords if w in subcommands][0] | |
except IndexError: | |
subcommand_name = None | |
parser = create_main_parser() | |
# subcommand options | |
if subcommand_name: | |
# special case: 'help' subcommand has no options | |
if subcommand_name == 'help': | |
sys.exit(1) | |
# special case: list locally installed dists for uninstall command | |
if subcommand_name == 'uninstall' and not current.startswith('-'): | |
installed = [] | |
lc = current.lower() | |
for dist in get_installed_distributions(local_only=True): | |
if dist.key.startswith(lc) and dist.key not in cwords[1:]: | |
installed.append(dist.key) | |
# if there are no dists installed, fall back to option completion | |
if installed: | |
for dist in installed: | |
print(dist) | |
sys.exit(1) | |
subcommand = commands_dict[subcommand_name]() | |
options += [(opt.get_opt_string(), opt.nargs) | |
for opt in subcommand.parser.option_list_all | |
if opt.help != optparse.SUPPRESS_HELP] | |
# filter out previously specified options from available options | |
prev_opts = [x.split('=')[0] for x in cwords[1:cword - 1]] | |
options = [(x, v) for (x, v) in options if x not in prev_opts] | |
# filter options by current input | |
options = [(k, v) for k, v in options if k.startswith(current)] | |
for option in options: | |
opt_label = option[0] | |
# append '=' to options which require args | |
if option[1]: | |
opt_label += '=' | |
print(opt_label) | |
else: | |
# show main parser options only when necessary | |
if current.startswith('-') or current.startswith('--'): | |
opts = [i.option_list for i in parser.option_groups] | |
opts.append(parser.option_list) | |
opts = (o for it in opts for o in it) | |
subcommands += [i.get_opt_string() for i in opts | |
if i.help != optparse.SUPPRESS_HELP] | |
print(' '.join([x for x in subcommands if x.startswith(current)])) | |
sys.exit(1) | |
def create_main_parser(): | |
parser_kw = { | |
'usage': '\n%prog <command> [options]', | |
'add_help_option': False, | |
'formatter': UpdatingDefaultsHelpFormatter(), | |
'name': 'global', | |
'prog': get_prog(), | |
} | |
parser = ConfigOptionParser(**parser_kw) | |
parser.disable_interspersed_args() | |
pip_pkg_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
parser.version = 'pip %s from %s (python %s)' % ( | |
__version__, pip_pkg_dir, sys.version[:3]) | |
# add the general options | |
gen_opts = cmdoptions.make_option_group(cmdoptions.general_group, parser) | |
parser.add_option_group(gen_opts) | |
parser.main = True # so the help formatter knows | |
# create command listing for description | |
command_summaries = get_summaries() | |
description = [''] + ['%-27s %s' % (i, j) for i, j in command_summaries] | |
parser.description = '\n'.join(description) | |
return parser | |
def parseopts(args): | |
parser = create_main_parser() | |
# Note: parser calls disable_interspersed_args(), so the result of this | |
# call is to split the initial args into the general options before the | |
# subcommand and everything else. | |
# For example: | |
# args: ['--timeout=5', 'install', '--user', 'INITools'] | |
# general_options: ['--timeout==5'] | |
# args_else: ['install', '--user', 'INITools'] | |
general_options, args_else = parser.parse_args(args) | |
# --version | |
if general_options.version: | |
sys.stdout.write(parser.version) | |
sys.stdout.write(os.linesep) | |
sys.exit() | |
# pip || pip help -> print_help() | |
if not args_else or (args_else[0] == 'help' and len(args_else) == 1): | |
parser.print_help() | |
sys.exit() | |
# the subcommand name | |
cmd_name = args_else[0] | |
if cmd_name not in commands_dict: | |
guess = get_similar_commands(cmd_name) | |
msg = ['unknown command "%s"' % cmd_name] | |
if guess: | |
msg.append('maybe you meant "%s"' % guess) | |
raise CommandError(' - '.join(msg)) | |
# all the args without the subcommand | |
cmd_args = args[:] | |
cmd_args.remove(cmd_name) | |
return cmd_name, cmd_args | |
def check_isolated(args): | |
isolated = False | |
if "--isolated" in args: | |
isolated = True | |
return isolated | |
def main(args=None): | |
if args is None: | |
args = sys.argv[1:] | |
# Configure our deprecation warnings to be sent through loggers | |
deprecation.install_warning_logger() | |
autocomplete() | |
try: | |
cmd_name, cmd_args = parseopts(args) | |
except PipError as exc: | |
sys.stderr.write("ERROR: %s" % exc) | |
sys.stderr.write(os.linesep) | |
sys.exit(1) | |
# Needed for locale.getpreferredencoding(False) to work | |
# in pip.utils.encoding.auto_decode | |
try: | |
locale.setlocale(locale.LC_ALL, '') | |
except locale.Error as e: | |
# setlocale can apparently crash if locale are uninitialized | |
logger.debug("Ignoring error %s when setting locale", e) | |
command = commands_dict[cmd_name](isolated=check_isolated(cmd_args)) | |
return command.main(cmd_args) | |
# ########################################################### | |
# # Writing freeze files | |
class FrozenRequirement(object): | |
def __init__(self, name, req, editable, comments=()): | |
self.name = name | |
self.req = req | |
self.editable = editable | |
self.comments = comments | |
_rev_re = re.compile(r'-r(\d+)$') | |
_date_re = re.compile(r'-(20\d\d\d\d\d\d)$') | |
@classmethod | |
def from_dist(cls, dist, dependency_links): | |
location = os.path.normcase(os.path.abspath(dist.location)) | |
comments = [] | |
from pip.vcs import vcs, get_src_requirement | |
if dist_is_editable(dist) and vcs.get_backend_name(location): | |
editable = True | |
try: | |
req = get_src_requirement(dist, location) | |
except InstallationError as exc: | |
logger.warning( | |
"Error when trying to get requirement for VCS system %s, " | |
"falling back to uneditable format", exc | |
) | |
req = None | |
if req is None: | |
logger.warning( | |
'Could not determine repository location of %s', location | |
) | |
comments.append( | |
'## !! Could not determine repository location' | |
) | |
req = dist.as_requirement() | |
editable = False | |
else: | |
editable = False | |
req = dist.as_requirement() | |
specs = req.specs | |
assert len(specs) == 1 and specs[0][0] in ["==", "==="], \ | |
'Expected 1 spec with == or ===; specs = %r; dist = %r' % \ | |
(specs, dist) | |
version = specs[0][1] | |
ver_match = cls._rev_re.search(version) | |
date_match = cls._date_re.search(version) | |
if ver_match or date_match: | |
svn_backend = vcs.get_backend('svn') | |
if svn_backend: | |
svn_location = svn_backend().get_location( | |
dist, | |
dependency_links, | |
) | |
if not svn_location: | |
logger.warning( | |
'Warning: cannot find svn location for %s', req) | |
comments.append( | |
'## FIXME: could not find svn URL in dependency_links ' | |
'for this package:' | |
) | |
else: | |
comments.append( | |
'# Installing as editable to satisfy requirement %s:' % | |
req | |
) | |
if ver_match: | |
rev = ver_match.group(1) | |
else: | |
rev = '{%s}' % date_match.group(1) | |
editable = True | |
req = '%s@%s#egg=%s' % ( | |
svn_location, | |
rev, | |
cls.egg_name(dist) | |
) | |
return cls(dist.project_name, req, editable, comments) | |
@staticmethod | |
def egg_name(dist): | |
name = dist.egg_name() | |
match = re.search(r'-py\d\.\d$', name) | |
if match: | |
name = name[:match.start()] | |
return name | |
def __str__(self): | |
req = self.req | |
if self.editable: | |
req = '-e %s' % req | |
return '\n'.join(list(self.comments) + [str(req)]) + '\n' | |
if __name__ == '__main__': | |
sys.exit(main()) |
from __future__ import absolute_import | |
import os | |
import sys | |
# If we are running from a wheel, add the wheel to sys.path | |
# This allows the usage python pip-*.whl/pip install pip-*.whl | |
if __package__ == '': | |
# __file__ is pip-*.whl/pip/__main__.py | |
# first dirname call strips of '/__main__.py', second strips off '/pip' | |
# Resulting path is the name of the wheel itself | |
# Add that to sys.path so we can import pip | |
path = os.path.dirname(os.path.dirname(__file__)) | |
sys.path.insert(0, path) | |
import pip # noqa | |
if __name__ == '__main__': | |
sys.exit(pip.main()) |
""" | |
pip._vendor is for vendoring dependencies of pip to prevent needing pip to | |
depend on something external. | |
Files inside of pip._vendor should be considered immutable and should only be | |
updated to versions from upstream. | |
""" | |
from __future__ import absolute_import | |
import glob | |
import os.path | |
import sys | |
# Downstream redistributors which have debundled our dependencies should also | |
# patch this value to be true. This will trigger the additional patching | |
# to cause things like "six" to be available as pip. | |
DEBUNDLED = False | |
# By default, look in this directory for a bunch of .whl files which we will | |
# add to the beginning of sys.path before attempting to import anything. This | |
# is done to support downstream re-distributors like Debian and Fedora who | |
# wish to create their own Wheels for our dependencies to aid in debundling. | |
WHEEL_DIR = os.path.abspath(os.path.dirname(__file__)) | |
# Define a small helper function to alias our vendored modules to the real ones | |
# if the vendored ones do not exist. This idea of this was taken from | |
# https://github.com/kennethreitz/requests/pull/2567. | |
def vendored(modulename): | |
vendored_name = "{0}.{1}".format(__name__, modulename) | |
try: | |
__import__(vendored_name, globals(), locals(), level=0) | |
except ImportError: | |
try: | |
__import__(modulename, globals(), locals(), level=0) | |
except ImportError: | |
# We can just silently allow import failures to pass here. If we | |
# got to this point it means that ``import pip._vendor.whatever`` | |
# failed and so did ``import whatever``. Since we're importing this | |
# upfront in an attempt to alias imports, not erroring here will | |
# just mean we get a regular import error whenever pip *actually* | |
# tries to import one of these modules to use it, which actually | |
# gives us a better error message than we would have otherwise | |
# gotten. | |
pass | |
else: | |
sys.modules[vendored_name] = sys.modules[modulename] | |
base, head = vendored_name.rsplit(".", 1) | |
setattr(sys.modules[base], head, sys.modules[modulename]) | |
# If we're operating in a debundled setup, then we want to go ahead and trigger | |
# the aliasing of our vendored libraries as well as looking for wheels to add | |
# to our sys.path. This will cause all of this code to be a no-op typically | |
# however downstream redistributors can enable it in a consistent way across | |
# all platforms. | |
if DEBUNDLED: | |
# Actually look inside of WHEEL_DIR to find .whl files and add them to the | |
# front of our sys.path. | |
sys.path[:] = glob.glob(os.path.join(WHEEL_DIR, "*.whl")) + sys.path | |
# Actually alias all of our vendored dependencies. | |
vendored("cachecontrol") | |
vendored("colorama") | |
vendored("distlib") | |
vendored("distro") | |
vendored("html5lib") | |
vendored("lockfile") | |
vendored("six") | |
vendored("six.moves") | |
vendored("six.moves.urllib") | |
vendored("packaging") | |
vendored("packaging.version") | |
vendored("packaging.specifiers") | |
vendored("pkg_resources") | |
vendored("progress") | |
vendored("retrying") | |
vendored("requests") | |
vendored("requests.packages") | |
vendored("requests.packages.urllib3") | |
vendored("requests.packages.urllib3._collections") | |
vendored("requests.packages.urllib3.connection") | |
vendored("requests.packages.urllib3.connectionpool") | |
vendored("requests.packages.urllib3.contrib") | |
vendored("requests.packages.urllib3.contrib.ntlmpool") | |
vendored("requests.packages.urllib3.contrib.pyopenssl") | |
vendored("requests.packages.urllib3.exceptions") | |
vendored("requests.packages.urllib3.fields") | |
vendored("requests.packages.urllib3.filepost") | |
vendored("requests.packages.urllib3.packages") | |
vendored("requests.packages.urllib3.packages.ordered_dict") | |
vendored("requests.packages.urllib3.packages.six") | |
vendored("requests.packages.urllib3.packages.ssl_match_hostname") | |
vendored("requests.packages.urllib3.packages.ssl_match_hostname." | |
"_implementation") | |
vendored("requests.packages.urllib3.poolmanager") | |
vendored("requests.packages.urllib3.request") | |
vendored("requests.packages.urllib3.response") | |
vendored("requests.packages.urllib3.util") | |
vendored("requests.packages.urllib3.util.connection") | |
vendored("requests.packages.urllib3.util.request") | |
vendored("requests.packages.urllib3.util.response") | |
vendored("requests.packages.urllib3.util.retry") | |
vendored("requests.packages.urllib3.util.ssl_") | |
vendored("requests.packages.urllib3.util.timeout") | |
vendored("requests.packages.urllib3.util.url") |
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# Copyright (c) 2005-2010 ActiveState Software Inc. | |
# Copyright (c) 2013 Eddy Petrișor | |
"""Utilities for determining application-specific dirs. | |
See <http://github.com/ActiveState/appdirs> for details and usage. | |
""" | |
# Dev Notes: | |
# - MSDN on where to store app data files: | |
# http://support.microsoft.com/default.aspx?scid=kb;en-us;310294#XSLTH3194121123120121120120 | |
# - macOS: http://developer.apple.com/documentation/MacOSX/Conceptual/BPFileSystem/index.html | |
# - XDG spec for Un*x: http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html | |
__version_info__ = (1, 4, 0) | |
__version__ = '.'.join(map(str, __version_info__)) | |
import sys | |
import os | |
PY3 = sys.version_info[0] == 3 | |
if PY3: | |
unicode = str | |
if sys.platform.startswith('java'): | |
import platform | |
os_name = platform.java_ver()[3][0] | |
if os_name.startswith('Windows'): # "Windows XP", "Windows 7", etc. | |
system = 'win32' | |
elif os_name.startswith('Mac'): # "macOS", etc. | |
system = 'darwin' | |
else: # "Linux", "SunOS", "FreeBSD", etc. | |
# Setting this to "linux2" is not ideal, but only Windows or Mac | |
# are actually checked for and the rest of the module expects | |
# *sys.platform* style strings. | |
system = 'linux2' | |
else: | |
system = sys.platform | |
def user_data_dir(appname=None, appauthor=None, version=None, roaming=False): | |
r"""Return full path to the user-specific data dir for this application. | |
"appname" is the name of application. | |
If None, just the system directory is returned. | |
"appauthor" (only used on Windows) is the name of the | |
appauthor or distributing body for this application. Typically | |
it is the owning company name. This falls back to appname. You may | |
pass False to disable it. | |
"version" is an optional version path element to append to the | |
path. You might want to use this if you want multiple versions | |
of your app to be able to run independently. If used, this | |
would typically be "<major>.<minor>". | |
Only applied when appname is present. | |
"roaming" (boolean, default False) can be set True to use the Windows | |
roaming appdata directory. That means that for users on a Windows | |
network setup for roaming profiles, this user data will be | |
sync'd on login. See | |
<http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx> | |
for a discussion of issues. | |
Typical user data directories are: | |
macOS: ~/Library/Application Support/<AppName> | |
Unix: ~/.local/share/<AppName> # or in $XDG_DATA_HOME, if defined | |
Win XP (not roaming): C:\Documents and Settings\<username>\Application Data\<AppAuthor>\<AppName> | |
Win XP (roaming): C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName> | |
Win 7 (not roaming): C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName> | |
Win 7 (roaming): C:\Users\<username>\AppData\Roaming\<AppAuthor>\<AppName> | |
For Unix, we follow the XDG spec and support $XDG_DATA_HOME. | |
That means, by default "~/.local/share/<AppName>". | |
""" | |
if system == "win32": | |
if appauthor is None: | |
appauthor = appname | |
const = roaming and "CSIDL_APPDATA" or "CSIDL_LOCAL_APPDATA" | |
path = os.path.normpath(_get_win_folder(const)) | |
if appname: | |
if appauthor is not False: | |
path = os.path.join(path, appauthor, appname) | |
else: | |
path = os.path.join(path, appname) | |
elif system == 'darwin': | |
path = os.path.expanduser('~/Library/Application Support/') | |
if appname: | |
path = os.path.join(path, appname) | |
else: | |
path = os.getenv('XDG_DATA_HOME', os.path.expanduser("~/.local/share")) | |
if appname: | |
path = os.path.join(path, appname) | |
if appname and version: | |
path = os.path.join(path, version) | |
return path | |
def site_data_dir(appname=None, appauthor=None, version=None, multipath=False): | |
"""Return full path to the user-shared data dir for this application. | |
"appname" is the name of application. | |
If None, just the system directory is returned. | |
"appauthor" (only used on Windows) is the name of the | |
appauthor or distributing body for this application. Typically | |
it is the owning company name. This falls back to appname. You may | |
pass False to disable it. | |
"version" is an optional version path element to append to the | |
path. You might want to use this if you want multiple versions | |
of your app to be able to run independently. If used, this | |
would typically be "<major>.<minor>". | |
Only applied when appname is present. | |
"multipath" is an optional parameter only applicable to *nix | |
which indicates that the entire list of data dirs should be | |
returned. By default, the first item from XDG_DATA_DIRS is | |
returned, or '/usr/local/share/<AppName>', | |
if XDG_DATA_DIRS is not set | |
Typical user data directories are: | |
macOS: /Library/Application Support/<AppName> | |
Unix: /usr/local/share/<AppName> or /usr/share/<AppName> | |
Win XP: C:\Documents and Settings\All Users\Application Data\<AppAuthor>\<AppName> | |
Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.) | |
Win 7: C:\ProgramData\<AppAuthor>\<AppName> # Hidden, but writeable on Win 7. | |
For Unix, this is using the $XDG_DATA_DIRS[0] default. | |
WARNING: Do not use this on Windows. See the Vista-Fail note above for why. | |
""" | |
if system == "win32": | |
if appauthor is None: | |
appauthor = appname | |
path = os.path.normpath(_get_win_folder("CSIDL_COMMON_APPDATA")) | |
if appname: | |
if appauthor is not False: | |
path = os.path.join(path, appauthor, appname) | |
else: | |
path = os.path.join(path, appname) | |
elif system == 'darwin': | |
path = os.path.expanduser('/Library/Application Support') | |
if appname: | |
path = os.path.join(path, appname) | |
else: | |
# XDG default for $XDG_DATA_DIRS | |
# only first, if multipath is False | |
path = os.getenv('XDG_DATA_DIRS', | |
os.pathsep.join(['/usr/local/share', '/usr/share'])) | |
pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)] | |
if appname: | |
if version: | |
appname = os.path.join(appname, version) | |
pathlist = [os.sep.join([x, appname]) for x in pathlist] | |
if multipath: | |
path = os.pathsep.join(pathlist) | |
else: | |
path = pathlist[0] | |
return path | |
if appname and version: | |
path = os.path.join(path, version) | |
return path | |
def user_config_dir(appname=None, appauthor=None, version=None, roaming=False): | |
r"""Return full path to the user-specific config dir for this application. | |
"appname" is the name of application. | |
If None, just the system directory is returned. | |
"appauthor" (only used on Windows) is the name of the | |
appauthor or distributing body for this application. Typically | |
it is the owning company name. This falls back to appname. You may | |
pass False to disable it. | |
"version" is an optional version path element to append to the | |
path. You might want to use this if you want multiple versions | |
of your app to be able to run independently. If used, this | |
would typically be "<major>.<minor>". | |
Only applied when appname is present. | |
"roaming" (boolean, default False) can be set True to use the Windows | |
roaming appdata directory. That means that for users on a Windows | |
network setup for roaming profiles, this user data will be | |
sync'd on login. See | |
<http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx> | |
for a discussion of issues. | |
Typical user data directories are: | |
macOS: same as user_data_dir | |
Unix: ~/.config/<AppName> # or in $XDG_CONFIG_HOME, if defined | |
Win *: same as user_data_dir | |
For Unix, we follow the XDG spec and support $XDG_CONFIG_HOME. | |
That means, by deafult "~/.config/<AppName>". | |
""" | |
if system in ["win32", "darwin"]: | |
path = user_data_dir(appname, appauthor, None, roaming) | |
else: | |
path = os.getenv('XDG_CONFIG_HOME', os.path.expanduser("~/.config")) | |
if appname: | |
path = os.path.join(path, appname) | |
if appname and version: | |
path = os.path.join(path, version) | |
return path | |
def site_config_dir(appname=None, appauthor=None, version=None, multipath=False): | |
"""Return full path to the user-shared data dir for this application. | |
"appname" is the name of application. | |
If None, just the system directory is returned. | |
"appauthor" (only used on Windows) is the name of the | |
appauthor or distributing body for this application. Typically | |
it is the owning company name. This falls back to appname. You may | |
pass False to disable it. | |
"version" is an optional version path element to append to the | |
path. You might want to use this if you want multiple versions | |
of your app to be able to run independently. If used, this | |
would typically be "<major>.<minor>". | |
Only applied when appname is present. | |
"multipath" is an optional parameter only applicable to *nix | |
which indicates that the entire list of config dirs should be | |
returned. By default, the first item from XDG_CONFIG_DIRS is | |
returned, or '/etc/xdg/<AppName>', if XDG_CONFIG_DIRS is not set | |
Typical user data directories are: | |
macOS: same as site_data_dir | |
Unix: /etc/xdg/<AppName> or $XDG_CONFIG_DIRS[i]/<AppName> for each value in | |
$XDG_CONFIG_DIRS | |
Win *: same as site_data_dir | |
Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.) | |
For Unix, this is using the $XDG_CONFIG_DIRS[0] default, if multipath=False | |
WARNING: Do not use this on Windows. See the Vista-Fail note above for why. | |
""" | |
if system in ["win32", "darwin"]: | |
path = site_data_dir(appname, appauthor) | |
if appname and version: | |
path = os.path.join(path, version) | |
else: | |
# XDG default for $XDG_CONFIG_DIRS | |
# only first, if multipath is False | |
path = os.getenv('XDG_CONFIG_DIRS', '/etc/xdg') | |
pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)] | |
if appname: | |
if version: | |
appname = os.path.join(appname, version) | |
pathlist = [os.sep.join([x, appname]) for x in pathlist] | |
if multipath: | |
path = os.pathsep.join(pathlist) | |
else: | |
path = pathlist[0] | |
return path | |
def user_cache_dir(appname=None, appauthor=None, version=None, opinion=True): | |
r"""Return full path to the user-specific cache dir for this application. | |
"appname" is the name of application. | |
If None, just the system directory is returned. | |
"appauthor" (only used on Windows) is the name of the | |
appauthor or distributing body for this application. Typically | |
it is the owning company name. This falls back to appname. You may | |
pass False to disable it. | |
"version" is an optional version path element to append to the | |
path. You might want to use this if you want multiple versions | |
of your app to be able to run independently. If used, this | |
would typically be "<major>.<minor>". | |
Only applied when appname is present. | |
"opinion" (boolean) can be False to disable the appending of | |
"Cache" to the base app data dir for Windows. See | |
discussion below. | |
Typical user cache directories are: | |
macOS: ~/Library/Caches/<AppName> | |
Unix: ~/.cache/<AppName> (XDG default) | |
Win XP: C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Cache | |
Vista: C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Cache | |
On Windows the only suggestion in the MSDN docs is that local settings go in | |
the `CSIDL_LOCAL_APPDATA` directory. This is identical to the non-roaming | |
app data dir (the default returned by `user_data_dir` above). Apps typically | |
put cache data somewhere *under* the given dir here. Some examples: | |
...\Mozilla\Firefox\Profiles\<ProfileName>\Cache | |
...\Acme\SuperApp\Cache\1.0 | |
OPINION: This function appends "Cache" to the `CSIDL_LOCAL_APPDATA` value. | |
This can be disabled with the `opinion=False` option. | |
""" | |
if system == "win32": | |
if appauthor is None: | |
appauthor = appname | |
path = os.path.normpath(_get_win_folder("CSIDL_LOCAL_APPDATA")) | |
if appname: | |
if appauthor is not False: | |
path = os.path.join(path, appauthor, appname) | |
else: | |
path = os.path.join(path, appname) | |
if opinion: | |
path = os.path.join(path, "Cache") | |
elif system == 'darwin': | |
path = os.path.expanduser('~/Library/Caches') | |
if appname: | |
path = os.path.join(path, appname) | |
else: | |
path = os.getenv('XDG_CACHE_HOME', os.path.expanduser('~/.cache')) | |
if appname: | |
path = os.path.join(path, appname) | |
if appname and version: | |
path = os.path.join(path, version) | |
return path | |
def user_log_dir(appname=None, appauthor=None, version=None, opinion=True): | |
r"""Return full path to the user-specific log dir for this application. | |
"appname" is the name of application. | |
If None, just the system directory is returned. | |
"appauthor" (only used on Windows) is the name of the | |
appauthor or distributing body for this application. Typically | |
it is the owning company name. This falls back to appname. You may | |
pass False to disable it. | |
"version" is an optional version path element to append to the | |
path. You might want to use this if you want multiple versions | |
of your app to be able to run independently. If used, this | |
would typically be "<major>.<minor>". | |
Only applied when appname is present. | |
"opinion" (boolean) can be False to disable the appending of | |
"Logs" to the base app data dir for Windows, and "log" to the | |
base cache dir for Unix. See discussion below. | |
Typical user cache directories are: | |
macOS: ~/Library/Logs/<AppName> | |
Unix: ~/.cache/<AppName>/log # or under $XDG_CACHE_HOME if defined | |
Win XP: C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Logs | |
Vista: C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Logs | |
On Windows the only suggestion in the MSDN docs is that local settings | |
go in the `CSIDL_LOCAL_APPDATA` directory. (Note: I'm interested in | |
examples of what some windows apps use for a logs dir.) | |
OPINION: This function appends "Logs" to the `CSIDL_LOCAL_APPDATA` | |
value for Windows and appends "log" to the user cache dir for Unix. | |
This can be disabled with the `opinion=False` option. | |
""" | |
if system == "darwin": | |
path = os.path.join( | |
os.path.expanduser('~/Library/Logs'), | |
appname) | |
elif system == "win32": | |
path = user_data_dir(appname, appauthor, version) | |
version = False | |
if opinion: | |
path = os.path.join(path, "Logs") | |
else: | |
path = user_cache_dir(appname, appauthor, version) | |
version = False | |
if opinion: | |
path = os.path.join(path, "log") | |
if appname and version: | |
path = os.path.join(path, version) | |
return path | |
class AppDirs(object): | |
"""Convenience wrapper for getting application dirs.""" | |
def __init__(self, appname, appauthor=None, version=None, roaming=False, | |
multipath=False): | |
self.appname = appname | |
self.appauthor = appauthor | |
self.version = version | |
self.roaming = roaming | |
self.multipath = multipath | |
@property | |
def user_data_dir(self): | |
return user_data_dir(self.appname, self.appauthor, | |
version=self.version, roaming=self.roaming) | |
@property | |
def site_data_dir(self): | |
return site_data_dir(self.appname, self.appauthor, | |
version=self.version, multipath=self.multipath) | |
@property | |
def user_config_dir(self): | |
return user_config_dir(self.appname, self.appauthor, | |
version=self.version, roaming=self.roaming) | |
@property | |
def site_config_dir(self): | |
return site_config_dir(self.appname, self.appauthor, | |
version=self.version, multipath=self.multipath) | |
@property | |
def user_cache_dir(self): | |
return user_cache_dir(self.appname, self.appauthor, | |
version=self.version) | |
@property | |
def user_log_dir(self): | |
return user_log_dir(self.appname, self.appauthor, | |
version=self.version) | |
#---- internal support stuff | |
def _get_win_folder_from_registry(csidl_name): | |
"""This is a fallback technique at best. I'm not sure if using the | |
registry for this guarantees us the correct answer for all CSIDL_* | |
names. | |
""" | |
import _winreg | |
shell_folder_name = { | |
"CSIDL_APPDATA": "AppData", | |
"CSIDL_COMMON_APPDATA": "Common AppData", | |
"CSIDL_LOCAL_APPDATA": "Local AppData", | |
}[csidl_name] | |
key = _winreg.OpenKey( | |
_winreg.HKEY_CURRENT_USER, | |
r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders" | |
) | |
dir, type = _winreg.QueryValueEx(key, shell_folder_name) | |
return dir | |
def _get_win_folder_with_pywin32(csidl_name): | |
from win32com.shell import shellcon, shell | |
dir = shell.SHGetFolderPath(0, getattr(shellcon, csidl_name), 0, 0) | |
# Try to make this a unicode path because SHGetFolderPath does | |
# not return unicode strings when there is unicode data in the | |
# path. | |
try: | |
dir = unicode(dir) | |
# Downgrade to short path name if have highbit chars. See | |
# <http://bugs.activestate.com/show_bug.cgi?id=85099>. | |
has_high_char = False | |
for c in dir: | |
if ord(c) > 255: | |
has_high_char = True | |
break | |
if has_high_char: | |
try: | |
import win32api | |
dir = win32api.GetShortPathName(dir) | |
except ImportError: | |
pass | |
except UnicodeError: | |
pass | |
return dir | |
def _get_win_folder_with_ctypes(csidl_name): | |
import ctypes | |
csidl_const = { | |
"CSIDL_APPDATA": 26, | |
"CSIDL_COMMON_APPDATA": 35, | |
"CSIDL_LOCAL_APPDATA": 28, | |
}[csidl_name] | |
buf = ctypes.create_unicode_buffer(1024) | |
ctypes.windll.shell32.SHGetFolderPathW(None, csidl_const, None, 0, buf) | |
# Downgrade to short path name if have highbit chars. See | |
# <http://bugs.activestate.com/show_bug.cgi?id=85099>. | |
has_high_char = False | |
for c in buf: | |
if ord(c) > 255: | |
has_high_char = True | |
break | |
if has_high_char: | |
buf2 = ctypes.create_unicode_buffer(1024) | |
if ctypes.windll.kernel32.GetShortPathNameW(buf.value, buf2, 1024): | |
buf = buf2 | |
return buf.value | |
def _get_win_folder_with_jna(csidl_name): | |
import array | |
from com.sun import jna | |
from com.sun.jna.platform import win32 | |
buf_size = win32.WinDef.MAX_PATH * 2 | |
buf = array.zeros('c', buf_size) | |
shell = win32.Shell32.INSTANCE | |
shell.SHGetFolderPath(None, getattr(win32.ShlObj, csidl_name), None, win32.ShlObj.SHGFP_TYPE_CURRENT, buf) | |
dir = jna.Native.toString(buf.tostring()).rstrip("\0") | |
# Downgrade to short path name if have highbit chars. See | |
# <http://bugs.activestate.com/show_bug.cgi?id=85099>. | |
has_high_char = False | |
for c in dir: | |
if ord(c) > 255: | |
has_high_char = True | |
break | |
if has_high_char: | |
buf = array.zeros('c', buf_size) | |
kernel = win32.Kernel32.INSTANCE | |
if kernal.GetShortPathName(dir, buf, buf_size): | |
dir = jna.Native.toString(buf.tostring()).rstrip("\0") | |
return dir | |
if system == "win32": | |
try: | |
import win32com.shell | |
_get_win_folder = _get_win_folder_with_pywin32 | |
except ImportError: | |
try: | |
from ctypes import windll | |
_get_win_folder = _get_win_folder_with_ctypes | |
except ImportError: | |
try: | |
import com.sun.jna | |
_get_win_folder = _get_win_folder_with_jna | |
except ImportError: | |
_get_win_folder = _get_win_folder_from_registry | |
#---- self test code | |
if __name__ == "__main__": | |
appname = "MyApp" | |
appauthor = "MyCompany" | |
props = ("user_data_dir", "site_data_dir", | |
"user_config_dir", "site_config_dir", | |
"user_cache_dir", "user_log_dir") | |
print("-- app dirs (with optional 'version')") | |
dirs = AppDirs(appname, appauthor, version="1.0") | |
for prop in props: | |
print("%s: %s" % (prop, getattr(dirs, prop))) | |
print("\n-- app dirs (without optional 'version')") | |
dirs = AppDirs(appname, appauthor) | |
for prop in props: | |
print("%s: %s" % (prop, getattr(dirs, prop))) | |
print("\n-- app dirs (without optional 'appauthor')") | |
dirs = AppDirs(appname) | |
for prop in props: | |
print("%s: %s" % (prop, getattr(dirs, prop))) | |
print("\n-- app dirs (with disabled 'appauthor')") | |
dirs = AppDirs(appname, appauthor=False) | |
for prop in props: | |
print("%s: %s" % (prop, getattr(dirs, prop))) |
"""CacheControl import Interface. | |
Make it easy to import from cachecontrol without long namespaces. | |
""" | |
__author__ = 'Eric Larson' | |
__email__ = '[email protected]' | |
__version__ = '0.11.7' | |
from .wrapper import CacheControl | |
from .adapter import CacheControlAdapter | |
from .controller import CacheController |
import logging | |
from pip._vendor import requests | |
from pip._vendor.cachecontrol.adapter import CacheControlAdapter | |
from pip._vendor.cachecontrol.cache import DictCache | |
from pip._vendor.cachecontrol.controller import logger | |
from argparse import ArgumentParser | |
def setup_logging(): | |
logger.setLevel(logging.DEBUG) | |
handler = logging.StreamHandler() | |
logger.addHandler(handler) | |
def get_session(): | |
adapter = CacheControlAdapter( | |
DictCache(), | |
cache_etags=True, | |
serializer=None, | |
heuristic=None, | |
) | |
sess = requests.Session() | |
sess.mount('http://', adapter) | |
sess.mount('https://', adapter) | |
sess.cache_controller = adapter.controller | |
return sess | |
def get_args(): | |
parser = ArgumentParser() | |
parser.add_argument('url', help='The URL to try and cache') | |
return parser.parse_args() | |
def main(args=None): | |
args = get_args() | |
sess = get_session() | |
# Make a request to get a response | |
resp = sess.get(args.url) | |
# Turn on logging | |
setup_logging() | |
# try setting the cache | |
sess.cache_controller.cache_response(resp.request, resp.raw) | |
# Now try to get it | |
if sess.cache_controller.cached_request(resp.request): | |
print('Cached!') | |
else: | |
print('Not cached :(') | |
if __name__ == '__main__': | |
main() |
import types | |
import functools | |
from pip._vendor.requests.adapters import HTTPAdapter | |
from .controller import CacheController | |
from .cache import DictCache | |
from .filewrapper import CallbackFileWrapper | |
class CacheControlAdapter(HTTPAdapter): | |
invalidating_methods = set(['PUT', 'DELETE']) | |
def __init__(self, cache=None, | |
cache_etags=True, | |
controller_class=None, | |
serializer=None, | |
heuristic=None, | |
*args, **kw): | |
super(CacheControlAdapter, self).__init__(*args, **kw) | |
self.cache = cache or DictCache() | |
self.heuristic = heuristic | |
controller_factory = controller_class or CacheController | |
self.controller = controller_factory( | |
self.cache, | |
cache_etags=cache_etags, | |
serializer=serializer, | |
) | |
def send(self, request, **kw): | |
""" | |
Send a request. Use the request information to see if it | |
exists in the cache and cache the response if we need to and can. | |
""" | |
if request.method == 'GET': | |
cached_response = self.controller.cached_request(request) | |
if cached_response: | |
return self.build_response(request, cached_response, | |
from_cache=True) | |
# check for etags and add headers if appropriate | |
request.headers.update( | |
self.controller.conditional_headers(request) | |
) | |
resp = super(CacheControlAdapter, self).send(request, **kw) | |
return resp | |
def build_response(self, request, response, from_cache=False): | |
""" | |
Build a response by making a request or using the cache. | |
This will end up calling send and returning a potentially | |
cached response | |
""" | |
if not from_cache and request.method == 'GET': | |
# Check for any heuristics that might update headers | |
# before trying to cache. | |
if self.heuristic: | |
response = self.heuristic.apply(response) | |
# apply any expiration heuristics | |
if response.status == 304: | |
# We must have sent an ETag request. This could mean | |
# that we've been expired already or that we simply | |
# have an etag. In either case, we want to try and | |
# update the cache if that is the case. | |
cached_response = self.controller.update_cached_response( | |
request, response | |
) | |
if cached_response is not response: | |
from_cache = True | |
# We are done with the server response, read a | |
# possible response body (compliant servers will | |
# not return one, but we cannot be 100% sure) and | |
# release the connection back to the pool. | |
response.read(decode_content=False) | |
response.release_conn() | |
response = cached_response | |
# We always cache the 301 responses | |
elif response.status == 301: | |
self.controller.cache_response(request, response) | |
else: | |
# Wrap the response file with a wrapper that will cache the | |
# response when the stream has been consumed. | |
response._fp = CallbackFileWrapper( | |
response._fp, | |
functools.partial( | |
self.controller.cache_response, | |
request, | |
response, | |
) | |
) | |
if response.chunked: | |
super_update_chunk_length = response._update_chunk_length | |
def _update_chunk_length(self): | |
super_update_chunk_length() | |
if self.chunk_left == 0: | |
self._fp._close() | |
response._update_chunk_length = types.MethodType(_update_chunk_length, response) | |
resp = super(CacheControlAdapter, self).build_response( | |
request, response | |
) | |
# See if we should invalidate the cache. | |
if request.method in self.invalidating_methods and resp.ok: | |
cache_url = self.controller.cache_url(request.url) | |
self.cache.delete(cache_url) | |
# Give the request a from_cache attr to let people use it | |
resp.from_cache = from_cache | |
return resp | |
def close(self): | |
self.cache.close() | |
super(CacheControlAdapter, self).close() |
""" | |
The cache object API for implementing caches. The default is a thread | |
safe in-memory dictionary. | |
""" | |
from threading import Lock | |
class BaseCache(object): | |
def get(self, key): | |
raise NotImplemented() | |
def set(self, key, value): | |
raise NotImplemented() | |
def delete(self, key): | |
raise NotImplemented() | |
def close(self): | |
pass | |
class DictCache(BaseCache): | |
def __init__(self, init_dict=None): | |
self.lock = Lock() | |
self.data = init_dict or {} | |
def get(self, key): | |
return self.data.get(key, None) | |
def set(self, key, value): | |
with self.lock: | |
self.data.update({key: value}) | |
def delete(self, key): | |
with self.lock: | |
if key in self.data: | |
self.data.pop(key) |
from textwrap import dedent | |
try: | |
from .file_cache import FileCache | |
except ImportError: | |
notice = dedent(''' | |
NOTE: In order to use the FileCache you must have | |
lockfile installed. You can install it via pip: | |
pip install lockfile | |
''') | |
print(notice) | |
try: | |
import redis | |
from .redis_cache import RedisCache | |
except ImportError: | |
pass |
import hashlib | |
import os | |
from pip._vendor.lockfile import LockFile | |
from pip._vendor.lockfile.mkdirlockfile import MkdirLockFile | |
from ..cache import BaseCache | |
from ..controller import CacheController | |
def _secure_open_write(filename, fmode): | |
# We only want to write to this file, so open it in write only mode | |
flags = os.O_WRONLY | |
# os.O_CREAT | os.O_EXCL will fail if the file already exists, so we only | |
# will open *new* files. | |
# We specify this because we want to ensure that the mode we pass is the | |
# mode of the file. | |
flags |= os.O_CREAT | os.O_EXCL | |
# Do not follow symlinks to prevent someone from making a symlink that | |
# we follow and insecurely open a cache file. | |
if hasattr(os, "O_NOFOLLOW"): | |
flags |= os.O_NOFOLLOW | |
# On Windows we'll mark this file as binary | |
if hasattr(os, "O_BINARY"): | |
flags |= os.O_BINARY | |
# Before we open our file, we want to delete any existing file that is | |
# there | |
try: | |
os.remove(filename) | |
except (IOError, OSError): | |
# The file must not exist already, so we can just skip ahead to opening | |
pass | |
# Open our file, the use of os.O_CREAT | os.O_EXCL will ensure that if a | |
# race condition happens between the os.remove and this line, that an | |
# error will be raised. Because we utilize a lockfile this should only | |
# happen if someone is attempting to attack us. | |
fd = os.open(filename, flags, fmode) | |
try: | |
return os.fdopen(fd, "wb") | |
except: | |
# An error occurred wrapping our FD in a file object | |
os.close(fd) | |
raise | |
class FileCache(BaseCache): | |
def __init__(self, directory, forever=False, filemode=0o0600, | |
dirmode=0o0700, use_dir_lock=None, lock_class=None): | |
if use_dir_lock is not None and lock_class is not None: | |
raise ValueError("Cannot use use_dir_lock and lock_class together") | |
if use_dir_lock: | |
lock_class = MkdirLockFile | |
if lock_class is None: | |
lock_class = LockFile | |
self.directory = directory | |
self.forever = forever | |
self.filemode = filemode | |
self.dirmode = dirmode | |
self.lock_class = lock_class | |
@staticmethod | |
def encode(x): | |
return hashlib.sha224(x.encode()).hexdigest() | |
def _fn(self, name): | |
# NOTE: This method should not change as some may depend on it. | |
# See: https://github.com/ionrock/cachecontrol/issues/63 | |
hashed = self.encode(name) | |
parts = list(hashed[:5]) + [hashed] | |
return os.path.join(self.directory, *parts) | |
def get(self, key): | |
name = self._fn(key) | |
if not os.path.exists(name): | |
return None | |
with open(name, 'rb') as fh: | |
return fh.read() | |
def set(self, key, value): | |
name = self._fn(key) | |
# Make sure the directory exists | |
try: | |
os.makedirs(os.path.dirname(name), self.dirmode) | |
except (IOError, OSError): | |
pass | |
with self.lock_class(name) as lock: | |
# Write our actual file | |
with _secure_open_write(lock.path, self.filemode) as fh: | |
fh.write(value) | |
def delete(self, key): | |
name = self._fn(key) | |
if not self.forever: | |
os.remove(name) | |
def url_to_file_path(url, filecache): | |
"""Return the file cache path based on the URL. | |
This does not ensure the file exists! | |
""" | |
key = CacheController.cache_url(url) | |
return filecache._fn(key) |
from __future__ import division | |
from datetime import datetime | |
def total_seconds(td): | |
"""Python 2.6 compatability""" | |
if hasattr(td, 'total_seconds'): | |
return td.total_seconds() | |
ms = td.microseconds | |
secs = (td.seconds + td.days * 24 * 3600) | |
return (ms + secs * 10**6) / 10**6 | |
class RedisCache(object): | |
def __init__(self, conn): | |
self.conn = conn | |
def get(self, key): | |
return self.conn.get(key) | |
def set(self, key, value, expires=None): | |
if not expires: | |
self.conn.set(key, value) | |
else: | |
expires = expires - datetime.now() | |
self.conn.setex(key, total_seconds(expires), value) | |
def delete(self, key): | |
self.conn.delete(key) | |
def clear(self): | |
"""Helper for clearing all the keys in a database. Use with | |
caution!""" | |
for key in self.conn.keys(): | |
self.conn.delete(key) | |
def close(self): | |
self.conn.disconnect() |
try: | |
from urllib.parse import urljoin | |
except ImportError: | |
from urlparse import urljoin | |
try: | |
import cPickle as pickle | |
except ImportError: | |
import pickle | |
from pip._vendor.requests.packages.urllib3.response import HTTPResponse | |
from pip._vendor.requests.packages.urllib3.util import is_fp_closed | |
# Replicate some six behaviour | |
try: | |
text_type = (unicode,) | |
except NameError: | |
text_type = (str,) |
""" | |
The httplib2 algorithms ported for use with requests. | |
""" | |
import logging | |
import re | |
import calendar | |
import time | |
from email.utils import parsedate_tz | |
from pip._vendor.requests.structures import CaseInsensitiveDict | |
from .cache import DictCache | |
from .serialize import Serializer | |
logger = logging.getLogger(__name__) | |
URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") | |
def parse_uri(uri): | |
"""Parses a URI using the regex given in Appendix B of RFC 3986. | |
(scheme, authority, path, query, fragment) = parse_uri(uri) | |
""" | |
groups = URI.match(uri).groups() | |
return (groups[1], groups[3], groups[4], groups[6], groups[8]) | |
class CacheController(object): | |
"""An interface to see if request should cached or not. | |
""" | |
def __init__(self, cache=None, cache_etags=True, serializer=None): | |
self.cache = cache or DictCache() | |
self.cache_etags = cache_etags | |
self.serializer = serializer or Serializer() | |
@classmethod | |
def _urlnorm(cls, uri): | |
"""Normalize the URL to create a safe key for the cache""" | |
(scheme, authority, path, query, fragment) = parse_uri(uri) | |
if not scheme or not authority: | |
raise Exception("Only absolute URIs are allowed. uri = %s" % uri) | |
scheme = scheme.lower() | |
authority = authority.lower() | |
if not path: | |
path = "/" | |
# Could do syntax based normalization of the URI before | |
# computing the digest. See Section 6.2.2 of Std 66. | |
request_uri = query and "?".join([path, query]) or path | |
defrag_uri = scheme + "://" + authority + request_uri | |
return defrag_uri | |
@classmethod | |
def cache_url(cls, uri): | |
return cls._urlnorm(uri) | |
def parse_cache_control(self, headers): | |
""" | |
Parse the cache control headers returning a dictionary with values | |
for the different directives. | |
""" | |
retval = {} | |
cc_header = 'cache-control' | |
if 'Cache-Control' in headers: | |
cc_header = 'Cache-Control' | |
if cc_header in headers: | |
parts = headers[cc_header].split(',') | |
parts_with_args = [ | |
tuple([x.strip().lower() for x in part.split("=", 1)]) | |
for part in parts if -1 != part.find("=") | |
] | |
parts_wo_args = [ | |
(name.strip().lower(), 1) | |
for name in parts if -1 == name.find("=") | |
] | |
retval = dict(parts_with_args + parts_wo_args) | |
return retval | |
def cached_request(self, request): | |
""" | |
Return a cached response if it exists in the cache, otherwise | |
return False. | |
""" | |
cache_url = self.cache_url(request.url) | |
logger.debug('Looking up "%s" in the cache', cache_url) | |
cc = self.parse_cache_control(request.headers) | |
# Bail out if the request insists on fresh data | |
if 'no-cache' in cc: | |
logger.debug('Request header has "no-cache", cache bypassed') | |
return False | |
if 'max-age' in cc and cc['max-age'] == 0: | |
logger.debug('Request header has "max_age" as 0, cache bypassed') | |
return False | |
# Request allows serving from the cache, let's see if we find something | |
cache_data = self.cache.get(cache_url) | |
if cache_data is None: | |
logger.debug('No cache entry available') | |
return False | |
# Check whether it can be deserialized | |
resp = self.serializer.loads(request, cache_data) | |
if not resp: | |
logger.warning('Cache entry deserialization failed, entry ignored') | |
return False | |
# If we have a cached 301, return it immediately. We don't | |
# need to test our response for other headers b/c it is | |
# intrinsically "cacheable" as it is Permanent. | |
# See: | |
# https://tools.ietf.org/html/rfc7231#section-6.4.2 | |
# | |
# Client can try to refresh the value by repeating the request | |
# with cache busting headers as usual (ie no-cache). | |
if resp.status == 301: | |
msg = ('Returning cached "301 Moved Permanently" response ' | |
'(ignoring date and etag information)') | |
logger.debug(msg) | |
return resp | |
headers = CaseInsensitiveDict(resp.headers) | |
if not headers or 'date' not in headers: | |
if 'etag' not in headers: | |
# Without date or etag, the cached response can never be used | |
# and should be deleted. | |
logger.debug('Purging cached response: no date or etag') | |
self.cache.delete(cache_url) | |
logger.debug('Ignoring cached response: no date') | |
return False | |
now = time.time() | |
date = calendar.timegm( | |
parsedate_tz(headers['date']) | |
) | |
current_age = max(0, now - date) | |
logger.debug('Current age based on date: %i', current_age) | |
# TODO: There is an assumption that the result will be a | |
# urllib3 response object. This may not be best since we | |
# could probably avoid instantiating or constructing the | |
# response until we know we need it. | |
resp_cc = self.parse_cache_control(headers) | |
# determine freshness | |
freshness_lifetime = 0 | |
# Check the max-age pragma in the cache control header | |
if 'max-age' in resp_cc and resp_cc['max-age'].isdigit(): | |
freshness_lifetime = int(resp_cc['max-age']) | |
logger.debug('Freshness lifetime from max-age: %i', | |
freshness_lifetime) | |
# If there isn't a max-age, check for an expires header | |
elif 'expires' in headers: | |
expires = parsedate_tz(headers['expires']) | |
if expires is not None: | |
expire_time = calendar.timegm(expires) - date | |
freshness_lifetime = max(0, expire_time) | |
logger.debug("Freshness lifetime from expires: %i", | |
freshness_lifetime) | |
# Determine if we are setting freshness limit in the | |
# request. Note, this overrides what was in the response. | |
if 'max-age' in cc: | |
try: | |
freshness_lifetime = int(cc['max-age']) | |
logger.debug('Freshness lifetime from request max-age: %i', | |
freshness_lifetime) | |
except ValueError: | |
freshness_lifetime = 0 | |
if 'min-fresh' in cc: | |
try: | |
min_fresh = int(cc['min-fresh']) | |
except ValueError: | |
min_fresh = 0 | |
# adjust our current age by our min fresh | |
current_age += min_fresh | |
logger.debug('Adjusted current age from min-fresh: %i', | |
current_age) | |
# Return entry if it is fresh enough | |
if freshness_lifetime > current_age: | |
logger.debug('The response is "fresh", returning cached response') | |
logger.debug('%i > %i', freshness_lifetime, current_age) | |
return resp | |
# we're not fresh. If we don't have an Etag, clear it out | |
if 'etag' not in headers: | |
logger.debug( | |
'The cached response is "stale" with no etag, purging' | |
) | |
self.cache.delete(cache_url) | |
# return the original handler | |
return False | |
def conditional_headers(self, request): | |
cache_url = self.cache_url(request.url) | |
resp = self.serializer.loads(request, self.cache.get(cache_url)) | |
new_headers = {} | |
if resp: | |
headers = CaseInsensitiveDict(resp.headers) | |
if 'etag' in headers: | |
new_headers['If-None-Match'] = headers['ETag'] | |
if 'last-modified' in headers: | |
new_headers['If-Modified-Since'] = headers['Last-Modified'] | |
return new_headers | |
def cache_response(self, request, response, body=None): | |
""" | |
Algorithm for caching requests. | |
This assumes a requests Response object. | |
""" | |
# From httplib2: Don't cache 206's since we aren't going to | |
# handle byte range requests | |
cacheable_status_codes = [200, 203, 300, 301] | |
if response.status not in cacheable_status_codes: | |
logger.debug( | |
'Status code %s not in %s', | |
response.status, | |
cacheable_status_codes | |
) | |
return | |
response_headers = CaseInsensitiveDict(response.headers) | |
# If we've been given a body, our response has a Content-Length, that | |
# Content-Length is valid then we can check to see if the body we've | |
# been given matches the expected size, and if it doesn't we'll just | |
# skip trying to cache it. | |
if (body is not None and | |
"content-length" in response_headers and | |
response_headers["content-length"].isdigit() and | |
int(response_headers["content-length"]) != len(body)): | |
return | |
cc_req = self.parse_cache_control(request.headers) | |
cc = self.parse_cache_control(response_headers) | |
cache_url = self.cache_url(request.url) | |
logger.debug('Updating cache with response from "%s"', cache_url) | |
# Delete it from the cache if we happen to have it stored there | |
no_store = False | |
if cc.get('no-store'): | |
no_store = True | |
logger.debug('Response header has "no-store"') | |
if cc_req.get('no-store'): | |
no_store = True | |
logger.debug('Request header has "no-store"') | |
if no_store and self.cache.get(cache_url): | |
logger.debug('Purging existing cache entry to honor "no-store"') | |
self.cache.delete(cache_url) | |
# If we've been given an etag, then keep the response | |
if self.cache_etags and 'etag' in response_headers: | |
logger.debug('Caching due to etag') | |
self.cache.set( | |
cache_url, | |
self.serializer.dumps(request, response, body=body), | |
) | |
# Add to the cache any 301s. We do this before looking that | |
# the Date headers. | |
elif response.status == 301: | |
logger.debug('Caching permanant redirect') | |
self.cache.set( | |
cache_url, | |
self.serializer.dumps(request, response) | |
) | |
# Add to the cache if the response headers demand it. If there | |
# is no date header then we can't do anything about expiring | |
# the cache. | |
elif 'date' in response_headers: | |
# cache when there is a max-age > 0 | |
if cc and cc.get('max-age'): | |
if cc['max-age'].isdigit() and int(cc['max-age']) > 0: | |
logger.debug('Caching b/c date exists and max-age > 0') | |
self.cache.set( | |
cache_url, | |
self.serializer.dumps(request, response, body=body), | |
) | |
# If the request can expire, it means we should cache it | |
# in the meantime. | |
elif 'expires' in response_headers: | |
if response_headers['expires']: | |
logger.debug('Caching b/c of expires header') | |
self.cache.set( | |
cache_url, | |
self.serializer.dumps(request, response, body=body), | |
) | |
def update_cached_response(self, request, response): | |
"""On a 304 we will get a new set of headers that we want to | |
update our cached value with, assuming we have one. | |
This should only ever be called when we've sent an ETag and | |
gotten a 304 as the response. | |
""" | |
cache_url = self.cache_url(request.url) | |
cached_response = self.serializer.loads( | |
request, | |
self.cache.get(cache_url) | |
) | |
if not cached_response: | |
# we didn't have a cached response | |
return response | |
# Lets update our headers with the headers from the new request: | |
# http://tools.ietf.org/html/draft-ietf-httpbis-p4-conditional-26#section-4.1 | |
# | |
# The server isn't supposed to send headers that would make | |
# the cached body invalid. But... just in case, we'll be sure | |
# to strip out ones we know that might be problmatic due to | |
# typical assumptions. | |
excluded_headers = [ | |
"content-length", | |
] | |
cached_response.headers.update( | |
dict((k, v) for k, v in response.headers.items() | |
if k.lower() not in excluded_headers) | |
) | |
# we want a 200 b/c we have content via the cache | |
cached_response.status = 200 | |
# update our cache | |
self.cache.set( | |
cache_url, | |
self.serializer.dumps(request, cached_response), | |
) | |
return cached_response |
from io import BytesIO | |
class CallbackFileWrapper(object): | |
""" | |
Small wrapper around a fp object which will tee everything read into a | |
buffer, and when that file is closed it will execute a callback with the | |
contents of that buffer. | |
All attributes are proxied to the underlying file object. | |
This class uses members with a double underscore (__) leading prefix so as | |
not to accidentally shadow an attribute. | |
""" | |
def __init__(self, fp, callback): | |
self.__buf = BytesIO() | |
self.__fp = fp | |
self.__callback = callback | |
def __getattr__(self, name): | |
# The vaguaries of garbage collection means that self.__fp is | |
# not always set. By using __getattribute__ and the private | |
# name[0] allows looking up the attribute value and raising an | |
# AttributeError when it doesn't exist. This stop thigns from | |
# infinitely recursing calls to getattr in the case where | |
# self.__fp hasn't been set. | |
# | |
# [0] https://docs.python.org/2/reference/expressions.html#atom-identifiers | |
fp = self.__getattribute__('_CallbackFileWrapper__fp') | |
return getattr(fp, name) | |
def __is_fp_closed(self): | |
try: | |
return self.__fp.fp is None | |
except AttributeError: | |
pass | |
try: | |
return self.__fp.closed | |
except AttributeError: | |
pass | |
# We just don't cache it then. | |
# TODO: Add some logging here... | |
return False | |
def _close(self): | |
if self.__callback: | |
self.__callback(self.__buf.getvalue()) | |
# We assign this to None here, because otherwise we can get into | |
# really tricky problems where the CPython interpreter dead locks | |
# because the callback is holding a reference to something which | |
# has a __del__ method. Setting this to None breaks the cycle | |
# and allows the garbage collector to do it's thing normally. | |
self.__callback = None | |
def read(self, amt=None): | |
data = self.__fp.read(amt) | |
self.__buf.write(data) | |
if self.__is_fp_closed(): | |
self._close() | |
return data | |
def _safe_read(self, amt): | |
data = self.__fp._safe_read(amt) | |
if amt == 2 and data == b'\r\n': | |
# urllib executes this read to toss the CRLF at the end | |
# of the chunk. | |
return data | |
self.__buf.write(data) | |
if self.__is_fp_closed(): | |
self._close() | |
return data |
import calendar | |
import time | |
from email.utils import formatdate, parsedate, parsedate_tz | |
from datetime import datetime, timedelta | |
TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT" | |
def expire_after(delta, date=None): | |
date = date or datetime.now() | |
return date + delta | |
def datetime_to_header(dt): | |
return formatdate(calendar.timegm(dt.timetuple())) | |
class BaseHeuristic(object): | |
def warning(self, response): | |
""" | |
Return a valid 1xx warning header value describing the cache | |
adjustments. | |
The response is provided too allow warnings like 113 | |
http://tools.ietf.org/html/rfc7234#section-5.5.4 where we need | |
to explicitly say response is over 24 hours old. | |
""" | |
return '110 - "Response is Stale"' | |
def update_headers(self, response): | |
"""Update the response headers with any new headers. | |
NOTE: This SHOULD always include some Warning header to | |
signify that the response was cached by the client, not | |
by way of the provided headers. | |
""" | |
return {} | |
def apply(self, response): | |
updated_headers = self.update_headers(response) | |
if updated_headers: | |
response.headers.update(updated_headers) | |
warning_header_value = self.warning(response) | |
if warning_header_value is not None: | |
response.headers.update({'Warning': warning_header_value}) | |
return response | |
class OneDayCache(BaseHeuristic): | |
""" | |
Cache the response by providing an expires 1 day in the | |
future. | |
""" | |
def update_headers(self, response): | |
headers = {} | |
if 'expires' not in response.headers: | |
date = parsedate(response.headers['date']) | |
expires = expire_after(timedelta(days=1), | |
date=datetime(*date[:6])) | |
headers['expires'] = datetime_to_header(expires) | |
headers['cache-control'] = 'public' | |
return headers | |
class ExpiresAfter(BaseHeuristic): | |
""" | |
Cache **all** requests for a defined time period. | |
""" | |
def __init__(self, **kw): | |
self.delta = timedelta(**kw) | |
def update_headers(self, response): | |
expires = expire_after(self.delta) | |
return { | |
'expires': datetime_to_header(expires), | |
'cache-control': 'public', | |
} | |
def warning(self, response): | |
tmpl = '110 - Automatically cached for %s. Response might be stale' | |
return tmpl % self.delta | |
class LastModified(BaseHeuristic): | |
""" | |
If there is no Expires header already, fall back on Last-Modified | |
using the heuristic from | |
http://tools.ietf.org/html/rfc7234#section-4.2.2 | |
to calculate a reasonable value. | |
Firefox also does something like this per | |
https://developer.mozilla.org/en-US/docs/Web/HTTP/Caching_FAQ | |
http://lxr.mozilla.org/mozilla-release/source/netwerk/protocol/http/nsHttpResponseHead.cpp#397 | |
Unlike mozilla we limit this to 24-hr. | |
""" | |
cacheable_by_default_statuses = set([ | |
200, 203, 204, 206, 300, 301, 404, 405, 410, 414, 501 | |
]) | |
def update_headers(self, resp): | |
headers = resp.headers | |
if 'expires' in headers: | |
return {} | |
if 'cache-control' in headers and headers['cache-control'] != 'public': | |
return {} | |
if resp.status not in self.cacheable_by_default_statuses: | |
return {} | |
if 'date' not in headers or 'last-modified' not in headers: | |
return {} | |
date = calendar.timegm(parsedate_tz(headers['date'])) | |
last_modified = parsedate(headers['last-modified']) | |
if date is None or last_modified is None: | |
return {} | |
now = time.time() | |
current_age = max(0, now - date) | |
delta = date - calendar.timegm(last_modified) | |
freshness_lifetime = max(0, min(delta / 10, 24 * 3600)) | |
if freshness_lifetime <= current_age: | |
return {} | |
expires = date + freshness_lifetime | |
return {'expires': time.strftime(TIME_FMT, time.gmtime(expires))} | |
def warning(self, resp): | |
return None |
import base64 | |
import io | |
import json | |
import zlib | |
from pip._vendor.requests.structures import CaseInsensitiveDict | |
from .compat import HTTPResponse, pickle, text_type | |
def _b64_encode_bytes(b): | |
return base64.b64encode(b).decode("ascii") | |
def _b64_encode_str(s): | |
return _b64_encode_bytes(s.encode("utf8")) | |
def _b64_encode(s): | |
if isinstance(s, text_type): | |
return _b64_encode_str(s) | |
return _b64_encode_bytes(s) | |
def _b64_decode_bytes(b): | |
return base64.b64decode(b.encode("ascii")) | |
def _b64_decode_str(s): | |
return _b64_decode_bytes(s).decode("utf8") | |
class Serializer(object): | |
def dumps(self, request, response, body=None): | |
response_headers = CaseInsensitiveDict(response.headers) | |
if body is None: | |
body = response.read(decode_content=False) | |
# NOTE: 99% sure this is dead code. I'm only leaving it | |
# here b/c I don't have a test yet to prove | |
# it. Basically, before using | |
# `cachecontrol.filewrapper.CallbackFileWrapper`, | |
# this made an effort to reset the file handle. The | |
# `CallbackFileWrapper` short circuits this code by | |
# setting the body as the content is consumed, the | |
# result being a `body` argument is *always* passed | |
# into cache_response, and in turn, | |
# `Serializer.dump`. | |
response._fp = io.BytesIO(body) | |
data = { | |
"response": { | |
"body": _b64_encode_bytes(body), | |
"headers": dict( | |
(_b64_encode(k), _b64_encode(v)) | |
for k, v in response.headers.items() | |
), | |
"status": response.status, | |
"version": response.version, | |
"reason": _b64_encode_str(response.reason), | |
"strict": response.strict, | |
"decode_content": response.decode_content, | |
}, | |
} | |
# Construct our vary headers | |
data["vary"] = {} | |
if "vary" in response_headers: | |
varied_headers = response_headers['vary'].split(',') | |
for header in varied_headers: | |
header = header.strip() | |
data["vary"][header] = request.headers.get(header, None) | |
# Encode our Vary headers to ensure they can be serialized as JSON | |
data["vary"] = dict( | |
(_b64_encode(k), _b64_encode(v) if v is not None else v) | |
for k, v in data["vary"].items() | |
) | |
return b",".join([ | |
b"cc=2", | |
zlib.compress( | |
json.dumps( | |
data, separators=(",", ":"), sort_keys=True, | |
).encode("utf8"), | |
), | |
]) | |
def loads(self, request, data): | |
# Short circuit if we've been given an empty set of data | |
if not data: | |
return | |
# Determine what version of the serializer the data was serialized | |
# with | |
try: | |
ver, data = data.split(b",", 1) | |
except ValueError: | |
ver = b"cc=0" | |
# Make sure that our "ver" is actually a version and isn't a false | |
# positive from a , being in the data stream. | |
if ver[:3] != b"cc=": | |
data = ver + data | |
ver = b"cc=0" | |
# Get the version number out of the cc=N | |
ver = ver.split(b"=", 1)[-1].decode("ascii") | |
# Dispatch to the actual load method for the given version | |
try: | |
return getattr(self, "_loads_v{0}".format(ver))(request, data) | |
except AttributeError: | |
# This is a version we don't have a loads function for, so we'll | |
# just treat it as a miss and return None | |
return | |
def prepare_response(self, request, cached): | |
"""Verify our vary headers match and construct a real urllib3 | |
HTTPResponse object. | |
""" | |
# Special case the '*' Vary value as it means we cannot actually | |
# determine if the cached response is suitable for this request. | |
if "*" in cached.get("vary", {}): | |
return | |
# Ensure that the Vary headers for the cached response match our | |
# request | |
for header, value in cached.get("vary", {}).items(): | |
if request.headers.get(header, None) != value: | |
return | |
body_raw = cached["response"].pop("body") | |
headers = CaseInsensitiveDict(data=cached['response']['headers']) | |
if headers.get('transfer-encoding', '') == 'chunked': | |
headers.pop('transfer-encoding') | |
cached['response']['headers'] = headers | |
try: | |
body = io.BytesIO(body_raw) | |
except TypeError: | |
# This can happen if cachecontrol serialized to v1 format (pickle) | |
# using Python 2. A Python 2 str(byte string) will be unpickled as | |
# a Python 3 str (unicode string), which will cause the above to | |
# fail with: | |
# | |
# TypeError: 'str' does not support the buffer interface | |
body = io.BytesIO(body_raw.encode('utf8')) | |
return HTTPResponse( | |
body=body, | |
preload_content=False, | |
**cached["response"] | |
) | |
def _loads_v0(self, request, data): | |
# The original legacy cache data. This doesn't contain enough | |
# information to construct everything we need, so we'll treat this as | |
# a miss. | |
return | |
def _loads_v1(self, request, data): | |
try: | |
cached = pickle.loads(data) | |
except ValueError: | |
return | |
return self.prepare_response(request, cached) | |
def _loads_v2(self, request, data): | |
try: | |
cached = json.loads(zlib.decompress(data).decode("utf8")) | |
except ValueError: | |
return | |
# We need to decode the items that we've base64 encoded | |
cached["response"]["body"] = _b64_decode_bytes( | |
cached["response"]["body"] | |
) | |
cached["response"]["headers"] = dict( | |
(_b64_decode_str(k), _b64_decode_str(v)) | |
for k, v in cached["response"]["headers"].items() | |
) | |
cached["response"]["reason"] = _b64_decode_str( | |
cached["response"]["reason"], | |
) | |
cached["vary"] = dict( | |
(_b64_decode_str(k), _b64_decode_str(v) if v is not None else v) | |
for k, v in cached["vary"].items() | |
) | |
return self.prepare_response(request, cached) |
from .adapter import CacheControlAdapter | |
from .cache import DictCache | |
def CacheControl(sess, | |
cache=None, | |
cache_etags=True, | |
serializer=None, | |
heuristic=None): | |
cache = cache or DictCache() | |
adapter = CacheControlAdapter( | |
cache, | |
cache_etags=cache_etags, | |
serializer=serializer, | |
heuristic=heuristic, | |
) | |
sess.mount('http://', adapter) | |
sess.mount('https://', adapter) | |
return sess |
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. | |
from .initialise import init, deinit, reinit, colorama_text | |
from .ansi import Fore, Back, Style, Cursor | |
from .ansitowin32 import AnsiToWin32 | |
__version__ = '0.3.7' | |
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. | |
''' | |
This module generates ANSI character codes to printing colors to terminals. | |
See: http://en.wikipedia.org/wiki/ANSI_escape_code | |
''' | |
CSI = '\033[' | |
OSC = '\033]' | |
BEL = '\007' | |
def code_to_chars(code): | |
return CSI + str(code) + 'm' | |
def set_title(title): | |
return OSC + '2;' + title + BEL | |
def clear_screen(mode=2): | |
return CSI + str(mode) + 'J' | |
def clear_line(mode=2): | |
return CSI + str(mode) + 'K' | |
class AnsiCodes(object): | |
def __init__(self): | |
# the subclasses declare class attributes which are numbers. | |
# Upon instantiation we define instance attributes, which are the same | |
# as the class attributes but wrapped with the ANSI escape sequence | |
for name in dir(self): | |
if not name.startswith('_'): | |
value = getattr(self, name) | |
setattr(self, name, code_to_chars(value)) | |
class AnsiCursor(object): | |
def UP(self, n=1): | |
return CSI + str(n) + 'A' | |
def DOWN(self, n=1): | |
return CSI + str(n) + 'B' | |
def FORWARD(self, n=1): | |
return CSI + str(n) + 'C' | |
def BACK(self, n=1): | |
return CSI + str(n) + 'D' | |
def POS(self, x=1, y=1): | |
return CSI + str(y) + ';' + str(x) + 'H' | |
class AnsiFore(AnsiCodes): | |
BLACK = 30 | |
RED = 31 | |
GREEN = 32 | |
YELLOW = 33 | |
BLUE = 34 | |
MAGENTA = 35 | |
CYAN = 36 | |
WHITE = 37 | |
RESET = 39 | |
# These are fairly well supported, but not part of the standard. | |
LIGHTBLACK_EX = 90 | |
LIGHTRED_EX = 91 | |
LIGHTGREEN_EX = 92 | |
LIGHTYELLOW_EX = 93 | |
LIGHTBLUE_EX = 94 | |
LIGHTMAGENTA_EX = 95 | |
LIGHTCYAN_EX = 96 | |
LIGHTWHITE_EX = 97 | |
class AnsiBack(AnsiCodes): | |
BLACK = 40 | |
RED = 41 | |
GREEN = 42 | |
YELLOW = 43 | |
BLUE = 44 | |
MAGENTA = 45 | |
CYAN = 46 | |
WHITE = 47 | |
RESET = 49 | |
# These are fairly well supported, but not part of the standard. | |
LIGHTBLACK_EX = 100 | |
LIGHTRED_EX = 101 | |
LIGHTGREEN_EX = 102 | |
LIGHTYELLOW_EX = 103 | |
LIGHTBLUE_EX = 104 | |
LIGHTMAGENTA_EX = 105 | |
LIGHTCYAN_EX = 106 | |
LIGHTWHITE_EX = 107 | |
class AnsiStyle(AnsiCodes): | |
BRIGHT = 1 | |
DIM = 2 | |
NORMAL = 22 | |
RESET_ALL = 0 | |
Fore = AnsiFore() | |
Back = AnsiBack() | |
Style = AnsiStyle() | |
Cursor = AnsiCursor() |
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. | |
import re | |
import sys | |
import os | |
from .ansi import AnsiFore, AnsiBack, AnsiStyle, Style | |
from .winterm import WinTerm, WinColor, WinStyle | |
from .win32 import windll, winapi_test | |
winterm = None | |
if windll is not None: | |
winterm = WinTerm() | |
def is_stream_closed(stream): | |
return not hasattr(stream, 'closed') or stream.closed | |
def is_a_tty(stream): | |
return hasattr(stream, 'isatty') and stream.isatty() | |
class StreamWrapper(object): | |
''' | |
Wraps a stream (such as stdout), acting as a transparent proxy for all | |
attribute access apart from method 'write()', which is delegated to our | |
Converter instance. | |
''' | |
def __init__(self, wrapped, converter): | |
# double-underscore everything to prevent clashes with names of | |
# attributes on the wrapped stream object. | |
self.__wrapped = wrapped | |
self.__convertor = converter | |
def __getattr__(self, name): | |
return getattr(self.__wrapped, name) | |
def write(self, text): | |
self.__convertor.write(text) | |
class AnsiToWin32(object): | |
''' | |
Implements a 'write()' method which, on Windows, will strip ANSI character | |
sequences from the text, and if outputting to a tty, will convert them into | |
win32 function calls. | |
''' | |
ANSI_CSI_RE = re.compile('\001?\033\[((?:\d|;)*)([a-zA-Z])\002?') # Control Sequence Introducer | |
ANSI_OSC_RE = re.compile('\001?\033\]((?:.|;)*?)(\x07)\002?') # Operating System Command | |
def __init__(self, wrapped, convert=None, strip=None, autoreset=False): | |
# The wrapped stream (normally sys.stdout or sys.stderr) | |
self.wrapped = wrapped | |
# should we reset colors to defaults after every .write() | |
self.autoreset = autoreset | |
# create the proxy wrapping our output stream | |
self.stream = StreamWrapper(wrapped, self) | |
on_windows = os.name == 'nt' | |
# We test if the WinAPI works, because even if we are on Windows | |
# we may be using a terminal that doesn't support the WinAPI | |
# (e.g. Cygwin Terminal). In this case it's up to the terminal | |
# to support the ANSI codes. | |
conversion_supported = on_windows and winapi_test() | |
# should we strip ANSI sequences from our output? | |
if strip is None: | |
strip = conversion_supported or (not is_stream_closed(wrapped) and not is_a_tty(wrapped)) | |
self.strip = strip | |
# should we should convert ANSI sequences into win32 calls? | |
if convert is None: | |
convert = conversion_supported and not is_stream_closed(wrapped) and is_a_tty(wrapped) | |
self.convert = convert | |
# dict of ansi codes to win32 functions and parameters | |
self.win32_calls = self.get_win32_calls() | |
# are we wrapping stderr? | |
self.on_stderr = self.wrapped is sys.stderr | |
def should_wrap(self): | |
''' | |
True if this class is actually needed. If false, then the output | |
stream will not be affected, nor will win32 calls be issued, so | |
wrapping stdout is not actually required. This will generally be | |
False on non-Windows platforms, unless optional functionality like | |
autoreset has been requested using kwargs to init() | |
''' | |
return self.convert or self.strip or self.autoreset | |
def get_win32_calls(self): | |
if self.convert and winterm: | |
return { | |
AnsiStyle.RESET_ALL: (winterm.reset_all, ), | |
AnsiStyle.BRIGHT: (winterm.style, WinStyle.BRIGHT), | |
AnsiStyle.DIM: (winterm.style, WinStyle.NORMAL), | |
AnsiStyle.NORMAL: (winterm.style, WinStyle.NORMAL), | |
AnsiFore.BLACK: (winterm.fore, WinColor.BLACK), | |
AnsiFore.RED: (winterm.fore, WinColor.RED), | |
AnsiFore.GREEN: (winterm.fore, WinColor.GREEN), | |
AnsiFore.YELLOW: (winterm.fore, WinColor.YELLOW), | |
AnsiFore.BLUE: (winterm.fore, WinColor.BLUE), | |
AnsiFore.MAGENTA: (winterm.fore, WinColor.MAGENTA), | |
AnsiFore.CYAN: (winterm.fore, WinColor.CYAN), | |
AnsiFore.WHITE: (winterm.fore, WinColor.GREY), | |
AnsiFore.RESET: (winterm.fore, ), | |
AnsiFore.LIGHTBLACK_EX: (winterm.fore, WinColor.BLACK, True), | |
AnsiFore.LIGHTRED_EX: (winterm.fore, WinColor.RED, True), | |
AnsiFore.LIGHTGREEN_EX: (winterm.fore, WinColor.GREEN, True), | |
AnsiFore.LIGHTYELLOW_EX: (winterm.fore, WinColor.YELLOW, True), | |
AnsiFore.LIGHTBLUE_EX: (winterm.fore, WinColor.BLUE, True), | |
AnsiFore.LIGHTMAGENTA_EX: (winterm.fore, WinColor.MAGENTA, True), | |
AnsiFore.LIGHTCYAN_EX: (winterm.fore, WinColor.CYAN, True), | |
AnsiFore.LIGHTWHITE_EX: (winterm.fore, WinColor.GREY, True), | |
AnsiBack.BLACK: (winterm.back, WinColor.BLACK), | |
AnsiBack.RED: (winterm.back, WinColor.RED), | |
AnsiBack.GREEN: (winterm.back, WinColor.GREEN), | |
AnsiBack.YELLOW: (winterm.back, WinColor.YELLOW), | |
AnsiBack.BLUE: (winterm.back, WinColor.BLUE), | |
AnsiBack.MAGENTA: (winterm.back, WinColor.MAGENTA), | |
AnsiBack.CYAN: (winterm.back, WinColor.CYAN), | |
AnsiBack.WHITE: (winterm.back, WinColor.GREY), | |
AnsiBack.RESET: (winterm.back, ), | |
AnsiBack.LIGHTBLACK_EX: (winterm.back, WinColor.BLACK, True), | |
AnsiBack.LIGHTRED_EX: (winterm.back, WinColor.RED, True), | |
AnsiBack.LIGHTGREEN_EX: (winterm.back, WinColor.GREEN, True), | |
AnsiBack.LIGHTYELLOW_EX: (winterm.back, WinColor.YELLOW, True), | |
AnsiBack.LIGHTBLUE_EX: (winterm.back, WinColor.BLUE, True), | |
AnsiBack.LIGHTMAGENTA_EX: (winterm.back, WinColor.MAGENTA, True), | |
AnsiBack.LIGHTCYAN_EX: (winterm.back, WinColor.CYAN, True), | |
AnsiBack.LIGHTWHITE_EX: (winterm.back, WinColor.GREY, True), | |
} | |
return dict() | |
def write(self, text): | |
if self.strip or self.convert: | |
self.write_and_convert(text) | |
else: | |
self.wrapped.write(text) | |
self.wrapped.flush() | |
if self.autoreset: | |
self.reset_all() | |
def reset_all(self): | |
if self.convert: | |
self.call_win32('m', (0,)) | |
elif not self.strip and not is_stream_closed(self.wrapped): | |
self.wrapped.write(Style.RESET_ALL) | |
def write_and_convert(self, text): | |
''' | |
Write the given text to our wrapped stream, stripping any ANSI | |
sequences from the text, and optionally converting them into win32 | |
calls. | |
''' | |
cursor = 0 | |
text = self.convert_osc(text) | |
for match in self.ANSI_CSI_RE.finditer(text): | |
start, end = match.span() | |
self.write_plain_text(text, cursor, start) | |
self.convert_ansi(*match.groups()) | |
cursor = end | |
self.write_plain_text(text, cursor, len(text)) | |
def write_plain_text(self, text, start, end): | |
if start < end: | |
self.wrapped.write(text[start:end]) | |
self.wrapped.flush() | |
def convert_ansi(self, paramstring, command): | |
if self.convert: | |
params = self.extract_params(command, paramstring) | |
self.call_win32(command, params) | |
def extract_params(self, command, paramstring): | |
if command in 'Hf': | |
params = tuple(int(p) if len(p) != 0 else 1 for p in paramstring.split(';')) | |
while len(params) < 2: | |
# defaults: | |
params = params + (1,) | |
else: | |
params = tuple(int(p) for p in paramstring.split(';') if len(p) != 0) | |
if len(params) == 0: | |
# defaults: | |
if command in 'JKm': | |
params = (0,) | |
elif command in 'ABCD': | |
params = (1,) | |
return params | |
def call_win32(self, command, params): | |
if command == 'm': | |
for param in params: | |
if param in self.win32_calls: | |
func_args = self.win32_calls[param] | |
func = func_args[0] | |
args = func_args[1:] | |
kwargs = dict(on_stderr=self.on_stderr) | |
func(*args, **kwargs) | |
elif command in 'J': | |
winterm.erase_screen(params[0], on_stderr=self.on_stderr) | |
elif command in 'K': | |
winterm.erase_line(params[0], on_stderr=self.on_stderr) | |
elif command in 'Hf': # cursor position - absolute | |
winterm.set_cursor_position(params, on_stderr=self.on_stderr) | |
elif command in 'ABCD': # cursor position - relative | |
n = params[0] | |
# A - up, B - down, C - forward, D - back | |
x, y = {'A': (0, -n), 'B': (0, n), 'C': (n, 0), 'D': (-n, 0)}[command] | |
winterm.cursor_adjust(x, y, on_stderr=self.on_stderr) | |
def convert_osc(self, text): | |
for match in self.ANSI_OSC_RE.finditer(text): | |
start, end = match.span() | |
text = text[:start] + text[end:] | |
paramstring, command = match.groups() | |
if command in '\x07': # \x07 = BEL | |
params = paramstring.split(";") | |
# 0 - change title and icon (we will only change title) | |
# 1 - change icon (we don't support this) | |
# 2 - change title | |
if params[0] in '02': | |
winterm.set_title(params[1]) | |
return text |
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. | |
import atexit | |
import contextlib | |
import sys | |
from .ansitowin32 import AnsiToWin32 | |
orig_stdout = None | |
orig_stderr = None | |
wrapped_stdout = None | |
wrapped_stderr = None | |
atexit_done = False | |
def reset_all(): | |
if AnsiToWin32 is not None: # Issue #74: objects might become None at exit | |
AnsiToWin32(orig_stdout).reset_all() | |
def init(autoreset=False, convert=None, strip=None, wrap=True): | |
if not wrap and any([autoreset, convert, strip]): | |
raise ValueError('wrap=False conflicts with any other arg=True') | |
global wrapped_stdout, wrapped_stderr | |
global orig_stdout, orig_stderr | |
orig_stdout = sys.stdout | |
orig_stderr = sys.stderr | |
if sys.stdout is None: | |
wrapped_stdout = None | |
else: | |
sys.stdout = wrapped_stdout = \ | |
wrap_stream(orig_stdout, convert, strip, autoreset, wrap) | |
if sys.stderr is None: | |
wrapped_stderr = None | |
else: | |
sys.stderr = wrapped_stderr = \ | |
wrap_stream(orig_stderr, convert, strip, autoreset, wrap) | |
global atexit_done | |
if not atexit_done: | |
atexit.register(reset_all) | |
atexit_done = True | |
def deinit(): | |
if orig_stdout is not None: | |
sys.stdout = orig_stdout | |
if orig_stderr is not None: | |
sys.stderr = orig_stderr | |
@contextlib.contextmanager | |
def colorama_text(*args, **kwargs): | |
init(*args, **kwargs) | |
try: | |
yield | |
finally: | |
deinit() | |
def reinit(): | |
if wrapped_stdout is not None: | |
sys.stdout = wrapped_stdout | |
if wrapped_stderr is not None: | |
sys.stderr = wrapped_stderr | |
def wrap_stream(stream, convert, strip, autoreset, wrap): | |
if wrap: | |
wrapper = AnsiToWin32(stream, | |
convert=convert, strip=strip, autoreset=autoreset) | |
if wrapper.should_wrap(): | |
stream = wrapper.stream | |
return stream | |
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. | |
# from winbase.h | |
STDOUT = -11 | |
STDERR = -12 | |
try: | |
import ctypes | |
from ctypes import LibraryLoader | |
windll = LibraryLoader(ctypes.WinDLL) | |
from ctypes import wintypes | |
except (AttributeError, ImportError): | |
windll = None | |
SetConsoleTextAttribute = lambda *_: None | |
winapi_test = lambda *_: None | |
else: | |
from ctypes import byref, Structure, c_char, POINTER | |
COORD = wintypes._COORD | |
class CONSOLE_SCREEN_BUFFER_INFO(Structure): | |
"""struct in wincon.h.""" | |
_fields_ = [ | |
("dwSize", COORD), | |
("dwCursorPosition", COORD), | |
("wAttributes", wintypes.WORD), | |
("srWindow", wintypes.SMALL_RECT), | |
("dwMaximumWindowSize", COORD), | |
] | |
def __str__(self): | |
return '(%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d)' % ( | |
self.dwSize.Y, self.dwSize.X | |
, self.dwCursorPosition.Y, self.dwCursorPosition.X | |
, self.wAttributes | |
, self.srWindow.Top, self.srWindow.Left, self.srWindow.Bottom, self.srWindow.Right | |
, self.dwMaximumWindowSize.Y, self.dwMaximumWindowSize.X | |
) | |
_GetStdHandle = windll.kernel32.GetStdHandle | |
_GetStdHandle.argtypes = [ | |
wintypes.DWORD, | |
] | |
_GetStdHandle.restype = wintypes.HANDLE | |
_GetConsoleScreenBufferInfo = windll.kernel32.GetConsoleScreenBufferInfo | |
_GetConsoleScreenBufferInfo.argtypes = [ | |
wintypes.HANDLE, | |
POINTER(CONSOLE_SCREEN_BUFFER_INFO), | |
] | |
_GetConsoleScreenBufferInfo.restype = wintypes.BOOL | |
_SetConsoleTextAttribute = windll.kernel32.SetConsoleTextAttribute | |
_SetConsoleTextAttribute.argtypes = [ | |
wintypes.HANDLE, | |
wintypes.WORD, | |
] | |
_SetConsoleTextAttribute.restype = wintypes.BOOL | |
_SetConsoleCursorPosition = windll.kernel32.SetConsoleCursorPosition | |
_SetConsoleCursorPosition.argtypes = [ | |
wintypes.HANDLE, | |
COORD, | |
] | |
_SetConsoleCursorPosition.restype = wintypes.BOOL | |
_FillConsoleOutputCharacterA = windll.kernel32.FillConsoleOutputCharacterA | |
_FillConsoleOutputCharacterA.argtypes = [ | |
wintypes.HANDLE, | |
c_char, | |
wintypes.DWORD, | |
COORD, | |
POINTER(wintypes.DWORD), | |
] | |
_FillConsoleOutputCharacterA.restype = wintypes.BOOL | |
_FillConsoleOutputAttribute = windll.kernel32.FillConsoleOutputAttribute | |
_FillConsoleOutputAttribute.argtypes = [ | |
wintypes.HANDLE, | |
wintypes.WORD, | |
wintypes.DWORD, | |
COORD, | |
POINTER(wintypes.DWORD), | |
] | |
_FillConsoleOutputAttribute.restype = wintypes.BOOL | |
_SetConsoleTitleW = windll.kernel32.SetConsoleTitleA | |
_SetConsoleTitleW.argtypes = [ | |
wintypes.LPCSTR | |
] | |
_SetConsoleTitleW.restype = wintypes.BOOL | |
handles = { | |
STDOUT: _GetStdHandle(STDOUT), | |
STDERR: _GetStdHandle(STDERR), | |
} | |
def winapi_test(): | |
handle = handles[STDOUT] | |
csbi = CONSOLE_SCREEN_BUFFER_INFO() | |
success = _GetConsoleScreenBufferInfo( | |
handle, byref(csbi)) | |
return bool(success) | |
def GetConsoleScreenBufferInfo(stream_id=STDOUT): | |
handle = handles[stream_id] | |
csbi = CONSOLE_SCREEN_BUFFER_INFO() | |
success = _GetConsoleScreenBufferInfo( | |
handle, byref(csbi)) | |
return csbi | |
def SetConsoleTextAttribute(stream_id, attrs): | |
handle = handles[stream_id] | |
return _SetConsoleTextAttribute(handle, attrs) | |
def SetConsoleCursorPosition(stream_id, position, adjust=True): | |
position = COORD(*position) | |
# If the position is out of range, do nothing. | |
if position.Y <= 0 or position.X <= 0: | |
return | |
# Adjust for Windows' SetConsoleCursorPosition: | |
# 1. being 0-based, while ANSI is 1-based. | |
# 2. expecting (x,y), while ANSI uses (y,x). | |
adjusted_position = COORD(position.Y - 1, position.X - 1) | |
if adjust: | |
# Adjust for viewport's scroll position | |
sr = GetConsoleScreenBufferInfo(STDOUT).srWindow | |
adjusted_position.Y += sr.Top | |
adjusted_position.X += sr.Left | |
# Resume normal processing | |
handle = handles[stream_id] | |
return _SetConsoleCursorPosition(handle, adjusted_position) | |
def FillConsoleOutputCharacter(stream_id, char, length, start): | |
handle = handles[stream_id] | |
char = c_char(char.encode()) | |
length = wintypes.DWORD(length) | |
num_written = wintypes.DWORD(0) | |
# Note that this is hard-coded for ANSI (vs wide) bytes. | |
success = _FillConsoleOutputCharacterA( | |
handle, char, length, start, byref(num_written)) | |
return num_written.value | |
def FillConsoleOutputAttribute(stream_id, attr, length, start): | |
''' FillConsoleOutputAttribute( hConsole, csbi.wAttributes, dwConSize, coordScreen, &cCharsWritten )''' | |
handle = handles[stream_id] | |
attribute = wintypes.WORD(attr) | |
length = wintypes.DWORD(length) | |
num_written = wintypes.DWORD(0) | |
# Note that this is hard-coded for ANSI (vs wide) bytes. | |
return _FillConsoleOutputAttribute( | |
handle, attribute, length, start, byref(num_written)) | |
def SetConsoleTitle(title): | |
return _SetConsoleTitleW(title) |
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. | |
from . import win32 | |
# from wincon.h | |
class WinColor(object): | |
BLACK = 0 | |
BLUE = 1 | |
GREEN = 2 | |
CYAN = 3 | |
RED = 4 | |
MAGENTA = 5 | |
YELLOW = 6 | |
GREY = 7 | |
# from wincon.h | |
class WinStyle(object): | |
NORMAL = 0x00 # dim text, dim background | |
BRIGHT = 0x08 # bright text, dim background | |
BRIGHT_BACKGROUND = 0x80 # dim text, bright background | |
class WinTerm(object): | |
def __init__(self): | |
self._default = win32.GetConsoleScreenBufferInfo(win32.STDOUT).wAttributes | |
self.set_attrs(self._default) | |
self._default_fore = self._fore | |
self._default_back = self._back | |
self._default_style = self._style | |
# In order to emulate LIGHT_EX in windows, we borrow the BRIGHT style. | |
# So that LIGHT_EX colors and BRIGHT style do not clobber each other, | |
# we track them separately, since LIGHT_EX is overwritten by Fore/Back | |
# and BRIGHT is overwritten by Style codes. | |
self._light = 0 | |
def get_attrs(self): | |
return self._fore + self._back * 16 + (self._style | self._light) | |
def set_attrs(self, value): | |
self._fore = value & 7 | |
self._back = (value >> 4) & 7 | |
self._style = value & (WinStyle.BRIGHT | WinStyle.BRIGHT_BACKGROUND) | |
def reset_all(self, on_stderr=None): | |
self.set_attrs(self._default) | |
self.set_console(attrs=self._default) | |
def fore(self, fore=None, light=False, on_stderr=False): | |
if fore is None: | |
fore = self._default_fore | |
self._fore = fore | |
# Emulate LIGHT_EX with BRIGHT Style | |
if light: | |
self._light |= WinStyle.BRIGHT | |
else: | |
self._light &= ~WinStyle.BRIGHT | |
self.set_console(on_stderr=on_stderr) | |
def back(self, back=None, light=False, on_stderr=False): | |
if back is None: | |
back = self._default_back | |
self._back = back | |
# Emulate LIGHT_EX with BRIGHT_BACKGROUND Style | |
if light: | |
self._light |= WinStyle.BRIGHT_BACKGROUND | |
else: | |
self._light &= ~WinStyle.BRIGHT_BACKGROUND | |
self.set_console(on_stderr=on_stderr) | |
def style(self, style=None, on_stderr=False): | |
if style is None: | |
style = self._default_style | |
self._style = style | |
self.set_console(on_stderr=on_stderr) | |
def set_console(self, attrs=None, on_stderr=False): | |
if attrs is None: | |
attrs = self.get_attrs() | |
handle = win32.STDOUT | |
if on_stderr: | |
handle = win32.STDERR | |
win32.SetConsoleTextAttribute(handle, attrs) | |
def get_position(self, handle): | |
position = win32.GetConsoleScreenBufferInfo(handle).dwCursorPosition | |
# Because Windows coordinates are 0-based, | |
# and win32.SetConsoleCursorPosition expects 1-based. | |
position.X += 1 | |
position.Y += 1 | |
return position | |
def set_cursor_position(self, position=None, on_stderr=False): | |
if position is None: | |
# I'm not currently tracking the position, so there is no default. | |
# position = self.get_position() | |
return | |
handle = win32.STDOUT | |
if on_stderr: | |
handle = win32.STDERR | |
win32.SetConsoleCursorPosition(handle, position) | |
def cursor_adjust(self, x, y, on_stderr=False): | |
handle = win32.STDOUT | |
if on_stderr: | |
handle = win32.STDERR | |
position = self.get_position(handle) | |
adjusted_position = (position.Y + y, position.X + x) | |
win32.SetConsoleCursorPosition(handle, adjusted_position, adjust=False) | |
def erase_screen(self, mode=0, on_stderr=False): | |
# 0 should clear from the cursor to the end of the screen. | |
# 1 should clear from the cursor to the beginning of the screen. | |
# 2 should clear the entire screen, and move cursor to (1,1) | |
handle = win32.STDOUT | |
if on_stderr: | |
handle = win32.STDERR | |
csbi = win32.GetConsoleScreenBufferInfo(handle) | |
# get the number of character cells in the current buffer | |
cells_in_screen = csbi.dwSize.X * csbi.dwSize.Y | |
# get number of character cells before current cursor position | |
cells_before_cursor = csbi.dwSize.X * csbi.dwCursorPosition.Y + csbi.dwCursorPosition.X | |
if mode == 0: | |
from_coord = csbi.dwCursorPosition | |
cells_to_erase = cells_in_screen - cells_before_cursor | |
if mode == 1: | |
from_coord = win32.COORD(0, 0) | |
cells_to_erase = cells_before_cursor | |
elif mode == 2: | |
from_coord = win32.COORD(0, 0) | |
cells_to_erase = cells_in_screen | |
# fill the entire screen with blanks | |
win32.FillConsoleOutputCharacter(handle, ' ', cells_to_erase, from_coord) | |
# now set the buffer's attributes accordingly | |
win32.FillConsoleOutputAttribute(handle, self.get_attrs(), cells_to_erase, from_coord) | |
if mode == 2: | |
# put the cursor where needed | |
win32.SetConsoleCursorPosition(handle, (1, 1)) | |
def erase_line(self, mode=0, on_stderr=False): | |
# 0 should clear from the cursor to the end of the line. | |
# 1 should clear from the cursor to the beginning of the line. | |
# 2 should clear the entire line. | |
handle = win32.STDOUT | |
if on_stderr: | |
handle = win32.STDERR | |
csbi = win32.GetConsoleScreenBufferInfo(handle) | |
if mode == 0: | |
from_coord = csbi.dwCursorPosition | |
cells_to_erase = csbi.dwSize.X - csbi.dwCursorPosition.X | |
if mode == 1: | |
from_coord = win32.COORD(0, csbi.dwCursorPosition.Y) | |
cells_to_erase = csbi.dwCursorPosition.X | |
elif mode == 2: | |
from_coord = win32.COORD(0, csbi.dwCursorPosition.Y) | |
cells_to_erase = csbi.dwSize.X | |
# fill the entire screen with blanks | |
win32.FillConsoleOutputCharacter(handle, ' ', cells_to_erase, from_coord) | |
# now set the buffer's attributes accordingly | |
win32.FillConsoleOutputAttribute(handle, self.get_attrs(), cells_to_erase, from_coord) | |
def set_title(self, title): | |
win32.SetConsoleTitle(title) |
# -*- coding: utf-8 -*- | |
# | |
# Copyright (C) 2012-2016 Vinay Sajip. | |
# Licensed to the Python Software Foundation under a contributor agreement. | |
# See LICENSE.txt and CONTRIBUTORS.txt. | |
# | |
import logging | |
__version__ = '0.2.4' | |
class DistlibException(Exception): | |
pass | |
try: | |
from logging import NullHandler | |
except ImportError: # pragma: no cover | |
class NullHandler(logging.Handler): | |
def handle(self, record): pass | |
def emit(self, record): pass | |
def createLock(self): self.lock = None | |
logger = logging.getLogger(__name__) | |
logger.addHandler(NullHandler()) |
"""Modules copied from Python 3 standard libraries, for internal use only. | |
Individual classes and functions are found in d2._backport.misc. Intended | |
usage is to always import things missing from 3.1 from that module: the | |
built-in/stdlib objects will be used if found. | |
""" |
# -*- coding: utf-8 -*- | |
# | |
# Copyright (C) 2012 The Python Software Foundation. | |
# See LICENSE.txt and CONTRIBUTORS.txt. | |
# | |
"""Backports for individual classes and functions.""" | |
import os | |
import sys | |
__all__ = ['cache_from_source', 'callable', 'fsencode'] | |
try: | |
from imp import cache_from_source | |
except ImportError: | |
def cache_from_source(py_file, debug=__debug__): | |
ext = debug and 'c' or 'o' | |
return py_file + ext | |
try: | |
callable = callable | |
except NameError: | |
from collections import Callable | |
def callable(obj): | |
return isinstance(obj, Callable) | |
try: | |
fsencode = os.fsencode | |
except AttributeError: | |
def fsencode(filename): | |
if isinstance(filename, bytes): | |
return filename | |
elif isinstance(filename, str): | |
return filename.encode(sys.getfilesystemencoding()) | |
else: | |
raise TypeError("expect bytes or str, not %s" % | |
type(filename).__name__) |
# -*- coding: utf-8 -*- | |
# | |
# Copyright (C) 2012 The Python Software Foundation. | |
# See LICENSE.txt and CONTRIBUTORS.txt. | |
# | |
"""Utility functions for copying and archiving files and directory trees. | |
XXX The functions here don't copy the resource fork or other metadata on Mac. | |
""" | |
import os | |
import sys | |
import stat | |
from os.path import abspath | |
import fnmatch | |
import collections | |
import errno | |
from . import tarfile | |
try: | |
import bz2 | |
_BZ2_SUPPORTED = True | |
except ImportError: | |
_BZ2_SUPPORTED = False | |
try: | |
from pwd import getpwnam | |
except ImportError: | |
getpwnam = None | |
try: | |
from grp import getgrnam | |
except ImportError: | |
getgrnam = None | |
__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2", | |
"copytree", "move", "rmtree", "Error", "SpecialFileError", | |
"ExecError", "make_archive", "get_archive_formats", | |
"register_archive_format", "unregister_archive_format", | |
"get_unpack_formats", "register_unpack_format", | |
"unregister_unpack_format", "unpack_archive", "ignore_patterns"] | |
class Error(EnvironmentError): | |
pass | |
class SpecialFileError(EnvironmentError): | |
"""Raised when trying to do a kind of operation (e.g. copying) which is | |
not supported on a special file (e.g. a named pipe)""" | |
class ExecError(EnvironmentError): | |
"""Raised when a command could not be executed""" | |
class ReadError(EnvironmentError): | |
"""Raised when an archive cannot be read""" | |
class RegistryError(Exception): | |
"""Raised when a registry operation with the archiving | |
and unpacking registries fails""" | |
try: | |
WindowsError | |
except NameError: | |
WindowsError = None | |
def copyfileobj(fsrc, fdst, length=16*1024): | |
"""copy data from file-like object fsrc to file-like object fdst""" | |
while 1: | |
buf = fsrc.read(length) | |
if not buf: | |
break | |
fdst.write(buf) | |
def _samefile(src, dst): | |
# Macintosh, Unix. | |
if hasattr(os.path, 'samefile'): | |
try: | |
return os.path.samefile(src, dst) | |
except OSError: | |
return False | |
# All other platforms: check for same pathname. | |
return (os.path.normcase(os.path.abspath(src)) == | |
os.path.normcase(os.path.abspath(dst))) | |
def copyfile(src, dst): | |
"""Copy data from src to dst""" | |
if _samefile(src, dst): | |
raise Error("`%s` and `%s` are the same file" % (src, dst)) | |
for fn in [src, dst]: | |
try: | |
st = os.stat(fn) | |
except OSError: | |
# File most likely does not exist | |
pass | |
else: | |
# XXX What about other special files? (sockets, devices...) | |
if stat.S_ISFIFO(st.st_mode): | |
raise SpecialFileError("`%s` is a named pipe" % fn) | |
with open(src, 'rb') as fsrc: | |
with open(dst, 'wb') as fdst: | |
copyfileobj(fsrc, fdst) | |
def copymode(src, dst): | |
"""Copy mode bits from src to dst""" | |
if hasattr(os, 'chmod'): | |
st = os.stat(src) | |
mode = stat.S_IMODE(st.st_mode) | |
os.chmod(dst, mode) | |
def copystat(src, dst): | |
"""Copy all stat info (mode bits, atime, mtime, flags) from src to dst""" | |
st = os.stat(src) | |
mode = stat.S_IMODE(st.st_mode) | |
if hasattr(os, 'utime'): | |
os.utime(dst, (st.st_atime, st.st_mtime)) | |
if hasattr(os, 'chmod'): | |
os.chmod(dst, mode) | |
if hasattr(os, 'chflags') and hasattr(st, 'st_flags'): | |
try: | |
os.chflags(dst, st.st_flags) | |
except OSError as why: | |
if (not hasattr(errno, 'EOPNOTSUPP') or | |
why.errno != errno.EOPNOTSUPP): | |
raise | |
def copy(src, dst): | |
"""Copy data and mode bits ("cp src dst"). | |
The destination may be a directory. | |
""" | |
if os.path.isdir(dst): | |
dst = os.path.join(dst, os.path.basename(src)) | |
copyfile(src, dst) | |
copymode(src, dst) | |
def copy2(src, dst): | |
"""Copy data and all stat info ("cp -p src dst"). | |
The destination may be a directory. | |
""" | |
if os.path.isdir(dst): | |
dst = os.path.join(dst, os.path.basename(src)) | |
copyfile(src, dst) | |
copystat(src, dst) | |
def ignore_patterns(*patterns): | |
"""Function that can be used as copytree() ignore parameter. | |
Patterns is a sequence of glob-style patterns | |
that are used to exclude files""" | |
def _ignore_patterns(path, names): | |
ignored_names = [] | |
for pattern in patterns: | |
ignored_names.extend(fnmatch.filter(names, pattern)) | |
return set(ignored_names) | |
return _ignore_patterns | |
def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, | |
ignore_dangling_symlinks=False): | |
"""Recursively copy a directory tree. | |
The destination directory must not already exist. | |
If exception(s) occur, an Error is raised with a list of reasons. | |
If the optional symlinks flag is true, symbolic links in the | |
source tree result in symbolic links in the destination tree; if | |
it is false, the contents of the files pointed to by symbolic | |
links are copied. If the file pointed by the symlink doesn't | |
exist, an exception will be added in the list of errors raised in | |
an Error exception at the end of the copy process. | |
You can set the optional ignore_dangling_symlinks flag to true if you | |
want to silence this exception. Notice that this has no effect on | |
platforms that don't support os.symlink. | |
The optional ignore argument is a callable. If given, it | |
is called with the `src` parameter, which is the directory | |
being visited by copytree(), and `names` which is the list of | |
`src` contents, as returned by os.listdir(): | |
callable(src, names) -> ignored_names | |
Since copytree() is called recursively, the callable will be | |
called once for each directory that is copied. It returns a | |
list of names relative to the `src` directory that should | |
not be copied. | |
The optional copy_function argument is a callable that will be used | |
to copy each file. It will be called with the source path and the | |
destination path as arguments. By default, copy2() is used, but any | |
function that supports the same signature (like copy()) can be used. | |
""" | |
names = os.listdir(src) | |
if ignore is not None: | |
ignored_names = ignore(src, names) | |
else: | |
ignored_names = set() | |
os.makedirs(dst) | |
errors = [] | |
for name in names: | |
if name in ignored_names: | |
continue | |
srcname = os.path.join(src, name) | |
dstname = os.path.join(dst, name) | |
try: | |
if os.path.islink(srcname): | |
linkto = os.readlink(srcname) | |
if symlinks: | |
os.symlink(linkto, dstname) | |
else: | |
# ignore dangling symlink if the flag is on | |
if not os.path.exists(linkto) and ignore_dangling_symlinks: | |
continue | |
# otherwise let the copy occurs. copy2 will raise an error | |
copy_function(srcname, dstname) | |
elif os.path.isdir(srcname): | |
copytree(srcname, dstname, symlinks, ignore, copy_function) | |
else: | |
# Will raise a SpecialFileError for unsupported file types | |
copy_function(srcname, dstname) | |
# catch the Error from the recursive copytree so that we can | |
# continue with other files | |
except Error as err: | |
errors.extend(err.args[0]) | |
except EnvironmentError as why: | |
errors.append((srcname, dstname, str(why))) | |
try: | |
copystat(src, dst) | |
except OSError as why: | |
if WindowsError is not None and isinstance(why, WindowsError): | |
# Copying file access times may fail on Windows | |
pass | |
else: | |
errors.extend((src, dst, str(why))) | |
if errors: | |
raise Error(errors) | |
def rmtree(path, ignore_errors=False, onerror=None): | |
"""Recursively delete a directory tree. | |
If ignore_errors is set, errors are ignored; otherwise, if onerror | |
is set, it is called to handle the error with arguments (func, | |
path, exc_info) where func is os.listdir, os.remove, or os.rmdir; | |
path is the argument to that function that caused it to fail; and | |
exc_info is a tuple returned by sys.exc_info(). If ignore_errors | |
is false and onerror is None, an exception is raised. | |
""" | |
if ignore_errors: | |
def onerror(*args): | |
pass | |
elif onerror is None: | |
def onerror(*args): | |
raise | |
try: | |
if os.path.islink(path): | |
# symlinks to directories are forbidden, see bug #1669 | |
raise OSError("Cannot call rmtree on a symbolic link") | |
except OSError: | |
onerror(os.path.islink, path, sys.exc_info()) | |
# can't continue even if onerror hook returns | |
return | |
names = [] | |
try: | |
names = os.listdir(path) | |
except os.error: | |
onerror(os.listdir, path, sys.exc_info()) | |
for name in names: | |
fullname = os.path.join(path, name) | |
try: | |
mode = os.lstat(fullname).st_mode | |
except os.error: | |
mode = 0 | |
if stat.S_ISDIR(mode): | |
rmtree(fullname, ignore_errors, onerror) | |
else: | |
try: | |
os.remove(fullname) | |
except os.error: | |
onerror(os.remove, fullname, sys.exc_info()) | |
try: | |
os.rmdir(path) | |
except os.error: | |
onerror(os.rmdir, path, sys.exc_info()) | |
def _basename(path): | |
# A basename() variant which first strips the trailing slash, if present. | |
# Thus we always get the last component of the path, even for directories. | |
return os.path.basename(path.rstrip(os.path.sep)) | |
def move(src, dst): | |
"""Recursively move a file or directory to another location. This is | |
similar to the Unix "mv" command. | |
If the destination is a directory or a symlink to a directory, the source | |
is moved inside the directory. The destination path must not already | |
exist. | |
If the destination already exists but is not a directory, it may be | |
overwritten depending on os.rename() semantics. | |
If the destination is on our current filesystem, then rename() is used. | |
Otherwise, src is copied to the destination and then removed. | |
A lot more could be done here... A look at a mv.c shows a lot of | |
the issues this implementation glosses over. | |
""" | |
real_dst = dst | |
if os.path.isdir(dst): | |
if _samefile(src, dst): | |
# We might be on a case insensitive filesystem, | |
# perform the rename anyway. | |
os.rename(src, dst) | |
return | |
real_dst = os.path.join(dst, _basename(src)) | |
if os.path.exists(real_dst): | |
raise Error("Destination path '%s' already exists" % real_dst) | |
try: | |
os.rename(src, real_dst) | |
except OSError: | |
if os.path.isdir(src): | |
if _destinsrc(src, dst): | |
raise Error("Cannot move a directory '%s' into itself '%s'." % (src, dst)) | |
copytree(src, real_dst, symlinks=True) | |
rmtree(src) | |
else: | |
copy2(src, real_dst) | |
os.unlink(src) | |
def _destinsrc(src, dst): | |
src = abspath(src) | |
dst = abspath(dst) | |
if not src.endswith(os.path.sep): | |
src += os.path.sep | |
if not dst.endswith(os.path.sep): | |
dst += os.path.sep | |
return dst.startswith(src) | |
def _get_gid(name): | |
"""Returns a gid, given a group name.""" | |
if getgrnam is None or name is None: | |
return None | |
try: | |
result = getgrnam(name) | |
except KeyError: | |
result = None | |
if result is not None: | |
return result[2] | |
return None | |
def _get_uid(name): | |
"""Returns an uid, given a user name.""" | |
if getpwnam is None or name is None: | |
return None | |
try: | |
result = getpwnam(name) | |
except KeyError: | |
result = None | |
if result is not None: | |
return result[2] | |
return None | |
def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0, | |
owner=None, group=None, logger=None): | |
"""Create a (possibly compressed) tar file from all the files under | |
'base_dir'. | |
'compress' must be "gzip" (the default), "bzip2", or None. | |
'owner' and 'group' can be used to define an owner and a group for the | |
archive that is being built. If not provided, the current owner and group | |
will be used. | |
The output tar file will be named 'base_name' + ".tar", possibly plus | |
the appropriate compression extension (".gz", or ".bz2"). | |
Returns the output filename. | |
""" | |
tar_compression = {'gzip': 'gz', None: ''} | |
compress_ext = {'gzip': '.gz'} | |
if _BZ2_SUPPORTED: | |
tar_compression['bzip2'] = 'bz2' | |
compress_ext['bzip2'] = '.bz2' | |
# flags for compression program, each element of list will be an argument | |
if compress is not None and compress not in compress_ext: | |
raise ValueError("bad value for 'compress', or compression format not " | |
"supported : {0}".format(compress)) | |
archive_name = base_name + '.tar' + compress_ext.get(compress, '') | |
archive_dir = os.path.dirname(archive_name) | |
if not os.path.exists(archive_dir): | |
if logger is not None: | |
logger.info("creating %s", archive_dir) | |
if not dry_run: | |
os.makedirs(archive_dir) | |
# creating the tarball | |
if logger is not None: | |
logger.info('Creating tar archive') | |
uid = _get_uid(owner) | |
gid = _get_gid(group) | |
def _set_uid_gid(tarinfo): | |
if gid is not None: | |
tarinfo.gid = gid | |
tarinfo.gname = group | |
if uid is not None: | |
tarinfo.uid = uid | |
tarinfo.uname = owner | |
return tarinfo | |
if not dry_run: | |
tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress]) | |
try: | |
tar.add(base_dir, filter=_set_uid_gid) | |
finally: | |
tar.close() | |
return archive_name | |
def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False): | |
# XXX see if we want to keep an external call here | |
if verbose: | |
zipoptions = "-r" | |
else: | |
zipoptions = "-rq" | |
from distutils.errors import DistutilsExecError | |
from distutils.spawn import spawn | |
try: | |
spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run) | |
except DistutilsExecError: | |
# XXX really should distinguish between "couldn't find | |
# external 'zip' command" and "zip failed". | |
raise ExecError("unable to create zip file '%s': " | |
"could neither import the 'zipfile' module nor " | |
"find a standalone zip utility") % zip_filename | |
def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None): | |
"""Create a zip file from all the files under 'base_dir'. | |
The output zip file will be named 'base_name' + ".zip". Uses either the | |
"zipfile" Python module (if available) or the InfoZIP "zip" utility | |
(if installed and found on the default search path). If neither tool is | |
available, raises ExecError. Returns the name of the output zip | |
file. | |
""" | |
zip_filename = base_name + ".zip" | |
archive_dir = os.path.dirname(base_name) | |
if not os.path.exists(archive_dir): | |
if logger is not None: | |
logger.info("creating %s", archive_dir) | |
if not dry_run: | |
os.makedirs(archive_dir) | |
# If zipfile module is not available, try spawning an external 'zip' | |
# command. | |
try: | |
import zipfile | |
except ImportError: | |
zipfile = None | |
if zipfile is None: | |
_call_external_zip(base_dir, zip_filename, verbose, dry_run) | |
else: | |
if logger is not None: | |
logger.info("creating '%s' and adding '%s' to it", | |
zip_filename, base_dir) | |
if not dry_run: | |
zip = zipfile.ZipFile(zip_filename, "w", | |
compression=zipfile.ZIP_DEFLATED) | |
for dirpath, dirnames, filenames in os.walk(base_dir): | |
for name in filenames: | |
path = os.path.normpath(os.path.join(dirpath, name)) | |
if os.path.isfile(path): | |
zip.write(path, path) | |
if logger is not None: | |
logger.info("adding '%s'", path) | |
zip.close() | |
return zip_filename | |
_ARCHIVE_FORMATS = { | |
'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"), | |
'bztar': (_make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"), | |
'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"), | |
'zip': (_make_zipfile, [], "ZIP file"), | |
} | |
if _BZ2_SUPPORTED: | |
_ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')], | |
"bzip2'ed tar-file") | |
def get_archive_formats(): | |
"""Returns a list of supported formats for archiving and unarchiving. | |
Each element of the returned sequence is a tuple (name, description) | |
""" | |
formats = [(name, registry[2]) for name, registry in | |
_ARCHIVE_FORMATS.items()] | |
formats.sort() | |
return formats | |
def register_archive_format(name, function, extra_args=None, description=''): | |
"""Registers an archive format. | |
name is the name of the format. function is the callable that will be | |
used to create archives. If provided, extra_args is a sequence of | |
(name, value) tuples that will be passed as arguments to the callable. | |
description can be provided to describe the format, and will be returned | |
by the get_archive_formats() function. | |
""" | |
if extra_args is None: | |
extra_args = [] | |
if not isinstance(function, collections.Callable): | |
raise TypeError('The %s object is not callable' % function) | |
if not isinstance(extra_args, (tuple, list)): | |
raise TypeError('extra_args needs to be a sequence') | |
for element in extra_args: | |
if not isinstance(element, (tuple, list)) or len(element) !=2: | |
raise TypeError('extra_args elements are : (arg_name, value)') | |
_ARCHIVE_FORMATS[name] = (function, extra_args, description) | |
def unregister_archive_format(name): | |
del _ARCHIVE_FORMATS[name] | |
def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0, | |
dry_run=0, owner=None, group=None, logger=None): | |
"""Create an archive file (eg. zip or tar). | |
'base_name' is the name of the file to create, minus any format-specific | |
extension; 'format' is the archive format: one of "zip", "tar", "bztar" | |
or "gztar". | |
'root_dir' is a directory that will be the root directory of the | |
archive; ie. we typically chdir into 'root_dir' before creating the | |
archive. 'base_dir' is the directory where we start archiving from; | |
ie. 'base_dir' will be the common prefix of all files and | |
directories in the archive. 'root_dir' and 'base_dir' both default | |
to the current directory. Returns the name of the archive file. | |
'owner' and 'group' are used when creating a tar archive. By default, | |
uses the current owner and group. | |
""" | |
save_cwd = os.getcwd() | |
if root_dir is not None: | |
if logger is not None: | |
logger.debug("changing into '%s'", root_dir) | |
base_name = os.path.abspath(base_name) | |
if not dry_run: | |
os.chdir(root_dir) | |
if base_dir is None: | |
base_dir = os.curdir | |
kwargs = {'dry_run': dry_run, 'logger': logger} | |
try: | |
format_info = _ARCHIVE_FORMATS[format] | |
except KeyError: | |
raise ValueError("unknown archive format '%s'" % format) | |
func = format_info[0] | |
for arg, val in format_info[1]: | |
kwargs[arg] = val | |
if format != 'zip': | |
kwargs['owner'] = owner | |
kwargs['group'] = group | |
try: | |
filename = func(base_name, base_dir, **kwargs) | |
finally: | |
if root_dir is not None: | |
if logger is not None: | |
logger.debug("changing back to '%s'", save_cwd) | |
os.chdir(save_cwd) | |
return filename | |
def get_unpack_formats(): | |
"""Returns a list of supported formats for unpacking. | |
Each element of the returned sequence is a tuple | |
(name, extensions, description) | |
""" | |
formats = [(name, info[0], info[3]) for name, info in | |
_UNPACK_FORMATS.items()] | |
formats.sort() | |
return formats | |
def _check_unpack_options(extensions, function, extra_args): | |
"""Checks what gets registered as an unpacker.""" | |
# first make sure no other unpacker is registered for this extension | |
existing_extensions = {} | |
for name, info in _UNPACK_FORMATS.items(): | |
for ext in info[0]: | |
existing_extensions[ext] = name | |
for extension in extensions: | |
if extension in existing_extensions: | |
msg = '%s is already registered for "%s"' | |
raise RegistryError(msg % (extension, | |
existing_extensions[extension])) | |
if not isinstance(function, collections.Callable): | |
raise TypeError('The registered function must be a callable') | |
def register_unpack_format(name, extensions, function, extra_args=None, | |
description=''): | |
"""Registers an unpack format. | |
`name` is the name of the format. `extensions` is a list of extensions | |
corresponding to the format. | |
`function` is the callable that will be | |
used to unpack archives. The callable will receive archives to unpack. | |
If it's unable to handle an archive, it needs to raise a ReadError | |
exception. | |
If provided, `extra_args` is a sequence of | |
(name, value) tuples that will be passed as arguments to the callable. | |
description can be provided to describe the format, and will be returned | |
by the get_unpack_formats() function. | |
""" | |
if extra_args is None: | |
extra_args = [] | |
_check_unpack_options(extensions, function, extra_args) | |
_UNPACK_FORMATS[name] = extensions, function, extra_args, description | |
def unregister_unpack_format(name): | |
"""Removes the pack format from the registry.""" | |
del _UNPACK_FORMATS[name] | |
def _ensure_directory(path): | |
"""Ensure that the parent directory of `path` exists""" | |
dirname = os.path.dirname(path) | |
if not os.path.isdir(dirname): | |
os.makedirs(dirname) | |
def _unpack_zipfile(filename, extract_dir): | |
"""Unpack zip `filename` to `extract_dir` | |
""" | |
try: | |
import zipfile | |
except ImportError: | |
raise ReadError('zlib not supported, cannot unpack this archive.') | |
if not zipfile.is_zipfile(filename): | |
raise ReadError("%s is not a zip file" % filename) | |
zip = zipfile.ZipFile(filename) | |
try: | |
for info in zip.infolist(): | |
name = info.filename | |
# don't extract absolute paths or ones with .. in them | |
if name.startswith('/') or '..' in name: | |
continue | |
target = os.path.join(extract_dir, *name.split('/')) | |
if not target: | |
continue | |
_ensure_directory(target) | |
if not name.endswith('/'): | |
# file | |
data = zip.read(info.filename) | |
f = open(target, 'wb') | |
try: | |
f.write(data) | |
finally: | |
f.close() | |
del data | |
finally: | |
zip.close() | |
def _unpack_tarfile(filename, extract_dir): | |
"""Unpack tar/tar.gz/tar.bz2 `filename` to `extract_dir` | |
""" | |
try: | |
tarobj = tarfile.open(filename) | |
except tarfile.TarError: | |
raise ReadError( | |
"%s is not a compressed or uncompressed tar file" % filename) | |
try: | |
tarobj.extractall(extract_dir) | |
finally: | |
tarobj.close() | |
_UNPACK_FORMATS = { | |
'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"), | |
'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"), | |
'zip': (['.zip'], _unpack_zipfile, [], "ZIP file") | |
} | |
if _BZ2_SUPPORTED: | |
_UNPACK_FORMATS['bztar'] = (['.bz2'], _unpack_tarfile, [], | |
"bzip2'ed tar-file") | |
def _find_unpack_format(filename): | |
for name, info in _UNPACK_FORMATS.items(): | |
for extension in info[0]: | |
if filename.endswith(extension): | |
return name | |
return None | |
def unpack_archive(filename, extract_dir=None, format=None): | |
"""Unpack an archive. | |
`filename` is the name of the archive. | |
`extract_dir` is the name of the target directory, where the archive | |
is unpacked. If not provided, the current working directory is used. | |
`format` is the archive format: one of "zip", "tar", or "gztar". Or any | |
other registered format. If not provided, unpack_archive will use the | |
filename extension and see if an unpacker was registered for that | |
extension. | |
In case none is found, a ValueError is raised. | |
""" | |
if extract_dir is None: | |
extract_dir = os.getcwd() | |
if format is not None: | |
try: | |
format_info = _UNPACK_FORMATS[format] | |
except KeyError: | |
raise ValueError("Unknown unpack format '{0}'".format(format)) | |
func = format_info[1] | |
func(filename, extract_dir, **dict(format_info[2])) | |
else: | |
# we need to look at the registered unpackers supported extensions | |
format = _find_unpack_format(filename) | |
if format is None: | |
raise ReadError("Unknown archive format '{0}'".format(filename)) | |
func = _UNPACK_FORMATS[format][1] | |
kwargs = dict(_UNPACK_FORMATS[format][2]) | |
func(filename, extract_dir, **kwargs) |
[posix_prefix] | |
# Configuration directories. Some of these come straight out of the | |
# configure script. They are for implementing the other variables, not to | |
# be used directly in [resource_locations]. | |
confdir = /etc | |
datadir = /usr/share | |
libdir = /usr/lib | |
statedir = /var | |
# User resource directory | |
local = ~/.local/{distribution.name} | |
stdlib = {base}/lib/python{py_version_short} | |
platstdlib = {platbase}/lib/python{py_version_short} | |
purelib = {base}/lib/python{py_version_short}/site-packages | |
platlib = {platbase}/lib/python{py_version_short}/site-packages | |
include = {base}/include/python{py_version_short}{abiflags} | |
platinclude = {platbase}/include/python{py_version_short}{abiflags} | |
data = {base} | |
[posix_home] | |
stdlib = {base}/lib/python | |
platstdlib = {base}/lib/python | |
purelib = {base}/lib/python | |
platlib = {base}/lib/python | |
include = {base}/include/python | |
platinclude = {base}/include/python | |
scripts = {base}/bin | |
data = {base} | |
[nt] | |
stdlib = {base}/Lib | |
platstdlib = {base}/Lib | |
purelib = {base}/Lib/site-packages | |
platlib = {base}/Lib/site-packages | |
include = {base}/Include | |
platinclude = {base}/Include | |
scripts = {base}/Scripts | |
data = {base} | |
[os2] | |
stdlib = {base}/Lib | |
platstdlib = {base}/Lib | |
purelib = {base}/Lib/site-packages | |
platlib = {base}/Lib/site-packages | |
include = {base}/Include | |
platinclude = {base}/Include | |
scripts = {base}/Scripts | |
data = {base} | |
[os2_home] | |
stdlib = {userbase}/lib/python{py_version_short} | |
platstdlib = {userbase}/lib/python{py_version_short} | |
purelib = {userbase}/lib/python{py_version_short}/site-packages | |
platlib = {userbase}/lib/python{py_version_short}/site-packages | |
include = {userbase}/include/python{py_version_short} | |
scripts = {userbase}/bin | |
data = {userbase} | |
[nt_user] | |
stdlib = {userbase}/Python{py_version_nodot} | |
platstdlib = {userbase}/Python{py_version_nodot} | |
purelib = {userbase}/Python{py_version_nodot}/site-packages | |
platlib = {userbase}/Python{py_version_nodot}/site-packages | |
include = {userbase}/Python{py_version_nodot}/Include | |
scripts = {userbase}/Scripts | |
data = {userbase} | |
[posix_user] | |
stdlib = {userbase}/lib/python{py_version_short} | |
platstdlib = {userbase}/lib/python{py_version_short} | |
purelib = {userbase}/lib/python{py_version_short}/site-packages | |
platlib = {userbase}/lib/python{py_version_short}/site-packages | |
include = {userbase}/include/python{py_version_short} | |
scripts = {userbase}/bin | |
data = {userbase} | |
[osx_framework_user] | |
stdlib = {userbase}/lib/python | |
platstdlib = {userbase}/lib/python | |
purelib = {userbase}/lib/python/site-packages | |
platlib = {userbase}/lib/python/site-packages | |
include = {userbase}/include | |
scripts = {userbase}/bin | |
data = {userbase} |
# -*- coding: utf-8 -*- | |
# | |
# Copyright (C) 2012 The Python Software Foundation. | |
# See LICENSE.txt and CONTRIBUTORS.txt. | |
# | |
"""Access to Python's configuration information.""" | |
import codecs | |
import os | |
import re | |
import sys | |
from os.path import pardir, realpath | |
try: | |
import configparser | |
except ImportError: | |
import ConfigParser as configparser | |
__all__ = [ | |
'get_config_h_filename', | |
'get_config_var', | |
'get_config_vars', | |
'get_makefile_filename', | |
'get_path', | |
'get_path_names', | |
'get_paths', | |
'get_platform', | |
'get_python_version', | |
'get_scheme_names', | |
'parse_config_h', | |
] | |
def _safe_realpath(path): | |
try: | |
return realpath(path) | |
except OSError: | |
return path | |
if sys.executable: | |
_PROJECT_BASE = os.path.dirname(_safe_realpath(sys.executable)) | |
else: | |
# sys.executable can be empty if argv[0] has been changed and Python is | |
# unable to retrieve the real program name | |
_PROJECT_BASE = _safe_realpath(os.getcwd()) | |
if os.name == "nt" and "pcbuild" in _PROJECT_BASE[-8:].lower(): | |
_PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir)) | |
# PC/VS7.1 | |
if os.name == "nt" and "\\pc\\v" in _PROJECT_BASE[-10:].lower(): | |
_PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir, pardir)) | |
# PC/AMD64 | |
if os.name == "nt" and "\\pcbuild\\amd64" in _PROJECT_BASE[-14:].lower(): | |
_PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir, pardir)) | |
def is_python_build(): | |
for fn in ("Setup.dist", "Setup.local"): | |
if os.path.isfile(os.path.join(_PROJECT_BASE, "Modules", fn)): | |
return True | |
return False | |
_PYTHON_BUILD = is_python_build() | |
_cfg_read = False | |
def _ensure_cfg_read(): | |
global _cfg_read | |
if not _cfg_read: | |
from ..resources import finder | |
backport_package = __name__.rsplit('.', 1)[0] | |
_finder = finder(backport_package) | |
_cfgfile = _finder.find('sysconfig.cfg') | |
assert _cfgfile, 'sysconfig.cfg exists' | |
with _cfgfile.as_stream() as s: | |
_SCHEMES.readfp(s) | |
if _PYTHON_BUILD: | |
for scheme in ('posix_prefix', 'posix_home'): | |
_SCHEMES.set(scheme, 'include', '{srcdir}/Include') | |
_SCHEMES.set(scheme, 'platinclude', '{projectbase}/.') | |
_cfg_read = True | |
_SCHEMES = configparser.RawConfigParser() | |
_VAR_REPL = re.compile(r'\{([^{]*?)\}') | |
def _expand_globals(config): | |
_ensure_cfg_read() | |
if config.has_section('globals'): | |
globals = config.items('globals') | |
else: | |
globals = tuple() | |
sections = config.sections() | |
for section in sections: | |
if section == 'globals': | |
continue | |
for option, value in globals: | |
if config.has_option(section, option): | |
continue | |
config.set(section, option, value) | |
config.remove_section('globals') | |
# now expanding local variables defined in the cfg file | |
# | |
for section in config.sections(): | |
variables = dict(config.items(section)) | |
def _replacer(matchobj): | |
name = matchobj.group(1) | |
if name in variables: | |
return variables[name] | |
return matchobj.group(0) | |
for option, value in config.items(section): | |
config.set(section, option, _VAR_REPL.sub(_replacer, value)) | |
#_expand_globals(_SCHEMES) | |
# FIXME don't rely on sys.version here, its format is an implementation detail | |
# of CPython, use sys.version_info or sys.hexversion | |
_PY_VERSION = sys.version.split()[0] | |
_PY_VERSION_SHORT = sys.version[:3] | |
_PY_VERSION_SHORT_NO_DOT = _PY_VERSION[0] + _PY_VERSION[2] | |
_PREFIX = os.path.normpath(sys.prefix) | |
_EXEC_PREFIX = os.path.normpath(sys.exec_prefix) | |
_CONFIG_VARS = None | |
_USER_BASE = None | |
def _subst_vars(path, local_vars): | |
"""In the string `path`, replace tokens like {some.thing} with the | |
corresponding value from the map `local_vars`. | |
If there is no corresponding value, leave the token unchanged. | |
""" | |
def _replacer(matchobj): | |
name = matchobj.group(1) | |
if name in local_vars: | |
return local_vars[name] | |
elif name in os.environ: | |
return os.environ[name] | |
return matchobj.group(0) | |
return _VAR_REPL.sub(_replacer, path) | |
def _extend_dict(target_dict, other_dict): | |
target_keys = target_dict.keys() | |
for key, value in other_dict.items(): | |
if key in target_keys: | |
continue | |
target_dict[key] = value | |
def _expand_vars(scheme, vars): | |
res = {} | |
if vars is None: | |
vars = {} | |
_extend_dict(vars, get_config_vars()) | |
for key, value in _SCHEMES.items(scheme): | |
if os.name in ('posix', 'nt'): | |
value = os.path.expanduser(value) | |
res[key] = os.path.normpath(_subst_vars(value, vars)) | |
return res | |
def format_value(value, vars): | |
def _replacer(matchobj): | |
name = matchobj.group(1) | |
if name in vars: | |
return vars[name] | |
return matchobj.group(0) | |
return _VAR_REPL.sub(_replacer, value) | |
def _get_default_scheme(): | |
if os.name == 'posix': | |
# the default scheme for posix is posix_prefix | |
return 'posix_prefix' | |
return os.name | |
def _getuserbase(): | |
env_base = os.environ.get("PYTHONUSERBASE", None) | |
def joinuser(*args): | |
return os.path.expanduser(os.path.join(*args)) | |
# what about 'os2emx', 'riscos' ? | |
if os.name == "nt": | |
base = os.environ.get("APPDATA") or "~" | |
if env_base: | |
return env_base | |
else: | |
return joinuser(base, "Python") | |
if sys.platform == "darwin": | |
framework = get_config_var("PYTHONFRAMEWORK") | |
if framework: | |
if env_base: | |
return env_base | |
else: | |
return joinuser("~", "Library", framework, "%d.%d" % | |
sys.version_info[:2]) | |
if env_base: | |
return env_base | |
else: | |
return joinuser("~", ".local") | |
def _parse_makefile(filename, vars=None): | |
"""Parse a Makefile-style file. | |
A dictionary containing name/value pairs is returned. If an | |
optional dictionary is passed in as the second argument, it is | |
used instead of a new dictionary. | |
""" | |
# Regexes needed for parsing Makefile (and similar syntaxes, | |
# like old-style Setup files). | |
_variable_rx = re.compile("([a-zA-Z][a-zA-Z0-9_]+)\s*=\s*(.*)") | |
_findvar1_rx = re.compile(r"\$\(([A-Za-z][A-Za-z0-9_]*)\)") | |
_findvar2_rx = re.compile(r"\${([A-Za-z][A-Za-z0-9_]*)}") | |
if vars is None: | |
vars = {} | |
done = {} | |
notdone = {} | |
with codecs.open(filename, encoding='utf-8', errors="surrogateescape") as f: | |
lines = f.readlines() | |
for line in lines: | |
if line.startswith('#') or line.strip() == '': | |
continue | |
m = _variable_rx.match(line) | |
if m: | |
n, v = m.group(1, 2) | |
v = v.strip() | |
# `$$' is a literal `$' in make | |
tmpv = v.replace('$$', '') | |
if "$" in tmpv: | |
notdone[n] = v | |
else: | |
try: | |
v = int(v) | |
except ValueError: | |
# insert literal `$' | |
done[n] = v.replace('$$', '$') | |
else: | |
done[n] = v | |
# do variable interpolation here | |
variables = list(notdone.keys()) | |
# Variables with a 'PY_' prefix in the makefile. These need to | |
# be made available without that prefix through sysconfig. | |
# Special care is needed to ensure that variable expansion works, even | |
# if the expansion uses the name without a prefix. | |
renamed_variables = ('CFLAGS', 'LDFLAGS', 'CPPFLAGS') | |
while len(variables) > 0: | |
for name in tuple(variables): | |
value = notdone[name] | |
m = _findvar1_rx.search(value) or _findvar2_rx.search(value) | |
if m is not None: | |
n = m.group(1) | |
found = True | |
if n in done: | |
item = str(done[n]) | |
elif n in notdone: | |
# get it on a subsequent round | |
found = False | |
elif n in os.environ: | |
# do it like make: fall back to environment | |
item = os.environ[n] | |
elif n in renamed_variables: | |
if (name.startswith('PY_') and | |
name[3:] in renamed_variables): | |
item = "" | |
elif 'PY_' + n in notdone: | |
found = False | |
else: | |
item = str(done['PY_' + n]) | |
else: | |
done[n] = item = "" | |
if found: | |
after = value[m.end():] | |
value = value[:m.start()] + item + after | |
if "$" in after: | |
notdone[name] = value | |
else: | |
try: | |
value = int(value) | |
except ValueError: | |
done[name] = value.strip() | |
else: | |
done[name] = value | |
variables.remove(name) | |
if (name.startswith('PY_') and | |
name[3:] in renamed_variables): | |
name = name[3:] | |
if name not in done: | |
done[name] = value | |
else: | |
# bogus variable reference (e.g. "prefix=$/opt/python"); | |
# just drop it since we can't deal | |
done[name] = value | |
variables.remove(name) | |
# strip spurious spaces | |
for k, v in done.items(): | |
if isinstance(v, str): | |
done[k] = v.strip() | |
# save the results in the global dictionary | |
vars.update(done) | |
return vars | |
def get_makefile_filename(): | |
"""Return the path of the Makefile.""" | |
if _PYTHON_BUILD: | |
return os.path.join(_PROJECT_BASE, "Makefile") | |
if hasattr(sys, 'abiflags'): | |
config_dir_name = 'config-%s%s' % (_PY_VERSION_SHORT, sys.abiflags) | |
else: | |
config_dir_name = 'config' | |
return os.path.join(get_path('stdlib'), config_dir_name, 'Makefile') | |
def _init_posix(vars): | |
"""Initialize the module as appropriate for POSIX systems.""" | |
# load the installed Makefile: | |
makefile = get_makefile_filename() | |
try: | |
_parse_makefile(makefile, vars) | |
except IOError as e: | |
msg = "invalid Python installation: unable to open %s" % makefile | |
if hasattr(e, "strerror"): | |
msg = msg + " (%s)" % e.strerror | |
raise IOError(msg) | |
# load the installed pyconfig.h: | |
config_h = get_config_h_filename() | |
try: | |
with open(config_h) as f: | |
parse_config_h(f, vars) | |
except IOError as e: | |
msg = "invalid Python installation: unable to open %s" % config_h | |
if hasattr(e, "strerror"): | |
msg = msg + " (%s)" % e.strerror | |
raise IOError(msg) | |
# On AIX, there are wrong paths to the linker scripts in the Makefile | |
# -- these paths are relative to the Python source, but when installed | |
# the scripts are in another directory. | |
if _PYTHON_BUILD: | |
vars['LDSHARED'] = vars['BLDSHARED'] | |
def _init_non_posix(vars): | |
"""Initialize the module as appropriate for NT""" | |
# set basic install directories | |
vars['LIBDEST'] = get_path('stdlib') | |
vars['BINLIBDEST'] = get_path('platstdlib') | |
vars['INCLUDEPY'] = get_path('include') | |
vars['SO'] = '.pyd' | |
vars['EXE'] = '.exe' | |
vars['VERSION'] = _PY_VERSION_SHORT_NO_DOT | |
vars['BINDIR'] = os.path.dirname(_safe_realpath(sys.executable)) | |
# | |
# public APIs | |
# | |
def parse_config_h(fp, vars=None): | |
"""Parse a config.h-style file. | |
A dictionary containing name/value pairs is returned. If an | |
optional dictionary is passed in as the second argument, it is | |
used instead of a new dictionary. | |
""" | |
if vars is None: | |
vars = {} | |
define_rx = re.compile("#define ([A-Z][A-Za-z0-9_]+) (.*)\n") | |
undef_rx = re.compile("/[*] #undef ([A-Z][A-Za-z0-9_]+) [*]/\n") | |
while True: | |
line = fp.readline() | |
if not line: | |
break | |
m = define_rx.match(line) | |
if m: | |
n, v = m.group(1, 2) | |
try: | |
v = int(v) | |
except ValueError: | |
pass | |
vars[n] = v | |
else: | |
m = undef_rx.match(line) | |
if m: | |
vars[m.group(1)] = 0 | |
return vars | |
def get_config_h_filename(): | |
"""Return the path of pyconfig.h.""" | |
if _PYTHON_BUILD: | |
if os.name == "nt": | |
inc_dir = os.path.join(_PROJECT_BASE, "PC") | |
else: | |
inc_dir = _PROJECT_BASE | |
else: | |
inc_dir = get_path('platinclude') | |
return os.path.join(inc_dir, 'pyconfig.h') | |
def get_scheme_names(): | |
"""Return a tuple containing the schemes names.""" | |
return tuple(sorted(_SCHEMES.sections())) | |
def get_path_names(): | |
"""Return a tuple containing the paths names.""" | |
# xxx see if we want a static list | |
return _SCHEMES.options('posix_prefix') | |
def get_paths(scheme=_get_default_scheme(), vars=None, expand=True): | |
"""Return a mapping containing an install scheme. | |
``scheme`` is the install scheme name. If not provided, it will | |
return the default scheme for the current platform. | |
""" | |
_ensure_cfg_read() | |
if expand: | |
return _expand_vars(scheme, vars) | |
else: | |
return dict(_SCHEMES.items(scheme)) | |
def get_path(name, scheme=_get_default_scheme(), vars=None, expand=True): | |
"""Return a path corresponding to the scheme. | |
``scheme`` is the install scheme name. | |
""" | |
return get_paths(scheme, vars, expand)[name] | |
def get_config_vars(*args): | |
"""With no arguments, return a dictionary of all configuration | |
variables relevant for the current platform. | |
On Unix, this means every variable defined in Python's installed Makefile; | |
On Windows and Mac OS it's a much smaller set. | |
With arguments, return a list of values that result from looking up | |
each argument in the configuration variable dictionary. | |
""" | |
global _CONFIG_VARS | |
if _CONFIG_VARS is None: | |
_CONFIG_VARS = {} | |
# Normalized versions of prefix and exec_prefix are handy to have; | |
# in fact, these are the standard versions used most places in the | |
# distutils2 module. | |
_CONFIG_VARS['prefix'] = _PREFIX | |
_CONFIG_VARS['exec_prefix'] = _EXEC_PREFIX | |
_CONFIG_VARS['py_version'] = _PY_VERSION | |
_CONFIG_VARS['py_version_short'] = _PY_VERSION_SHORT | |
_CONFIG_VARS['py_version_nodot'] = _PY_VERSION[0] + _PY_VERSION[2] | |
_CONFIG_VARS['base'] = _PREFIX | |
_CONFIG_VARS['platbase'] = _EXEC_PREFIX | |
_CONFIG_VARS['projectbase'] = _PROJECT_BASE | |
try: | |
_CONFIG_VARS['abiflags'] = sys.abiflags | |
except AttributeError: | |
# sys.abiflags may not be defined on all platforms. | |
_CONFIG_VARS['abiflags'] = '' | |
if os.name in ('nt', 'os2'): | |
_init_non_posix(_CONFIG_VARS) | |
if os.name == 'posix': | |
_init_posix(_CONFIG_VARS) | |
# Setting 'userbase' is done below the call to the | |
# init function to enable using 'get_config_var' in | |
# the init-function. | |
if sys.version >= '2.6': | |
_CONFIG_VARS['userbase'] = _getuserbase() | |
if 'srcdir' not in _CONFIG_VARS: | |
_CONFIG_VARS['srcdir'] = _PROJECT_BASE | |
else: | |
_CONFIG_VARS['srcdir'] = _safe_realpath(_CONFIG_VARS['srcdir']) | |
# Convert srcdir into an absolute path if it appears necessary. | |
# Normally it is relative to the build directory. However, during | |
# testing, for example, we might be running a non-installed python | |
# from a different directory. | |
if _PYTHON_BUILD and os.name == "posix": | |
base = _PROJECT_BASE | |
try: | |
cwd = os.getcwd() | |
except OSError: | |
cwd = None | |
if (not os.path.isabs(_CONFIG_VARS['srcdir']) and | |
base != cwd): | |
# srcdir is relative and we are not in the same directory | |
# as the executable. Assume executable is in the build | |
# directory and make srcdir absolute. | |
srcdir = os.path.join(base, _CONFIG_VARS['srcdir']) | |
_CONFIG_VARS['srcdir'] = os.path.normpath(srcdir) | |
if sys.platform == 'darwin': | |
kernel_version = os.uname()[2] # Kernel version (8.4.3) | |
major_version = int(kernel_version.split('.')[0]) | |
if major_version < 8: | |
# On macOS before 10.4, check if -arch and -isysroot | |
# are in CFLAGS or LDFLAGS and remove them if they are. | |
# This is needed when building extensions on a 10.3 system | |
# using a universal build of python. | |
for key in ('LDFLAGS', 'BASECFLAGS', | |
# a number of derived variables. These need to be | |
# patched up as well. | |
'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'): | |
flags = _CONFIG_VARS[key] | |
flags = re.sub('-arch\s+\w+\s', ' ', flags) | |
flags = re.sub('-isysroot [^ \t]*', ' ', flags) | |
_CONFIG_VARS[key] = flags | |
else: | |
# Allow the user to override the architecture flags using | |
# an environment variable. | |
# NOTE: This name was introduced by Apple in OSX 10.5 and | |
# is used by several scripting languages distributed with | |
# that OS release. | |
if 'ARCHFLAGS' in os.environ: | |
arch = os.environ['ARCHFLAGS'] | |
for key in ('LDFLAGS', 'BASECFLAGS', | |
# a number of derived variables. These need to be | |
# patched up as well. | |
'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'): | |
flags = _CONFIG_VARS[key] | |
flags = re.sub('-arch\s+\w+\s', ' ', flags) | |
flags = flags + ' ' + arch | |
_CONFIG_VARS[key] = flags | |
# If we're on OSX 10.5 or later and the user tries to | |
# compiles an extension using an SDK that is not present | |
# on the current machine it is better to not use an SDK | |
# than to fail. | |
# | |
# The major usecase for this is users using a Python.org | |
# binary installer on OSX 10.6: that installer uses | |
# the 10.4u SDK, but that SDK is not installed by default | |
# when you install Xcode. | |
# | |
CFLAGS = _CONFIG_VARS.get('CFLAGS', '') | |
m = re.search('-isysroot\s+(\S+)', CFLAGS) | |
if m is not None: | |
sdk = m.group(1) | |
if not os.path.exists(sdk): | |
for key in ('LDFLAGS', 'BASECFLAGS', | |
# a number of derived variables. These need to be | |
# patched up as well. | |
'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'): | |
flags = _CONFIG_VARS[key] | |
flags = re.sub('-isysroot\s+\S+(\s|$)', ' ', flags) | |
_CONFIG_VARS[key] = flags | |
if args: | |
vals = [] | |
for name in args: | |
vals.append(_CONFIG_VARS.get(name)) | |
return vals | |
else: | |
return _CONFIG_VARS | |
def get_config_var(name): | |
"""Return the value of a single variable using the dictionary returned by | |
'get_config_vars()'. | |
Equivalent to get_config_vars().get(name) | |
""" | |
return get_config_vars().get(name) | |
def get_platform(): | |
"""Return a string that identifies the current platform. | |
This is used mainly to distinguish platform-specific build directories and | |
platform-specific built distributions. Typically includes the OS name | |
and version and the architecture (as supplied by 'os.uname()'), | |
although the exact information included depends on the OS; eg. for IRIX | |
the architecture isn't particularly important (IRIX only runs on SGI | |
hardware), but for Linux the kernel version isn't particularly | |
important. | |
Examples of returned values: | |
linux-i586 | |
linux-alpha (?) | |
solaris-2.6-sun4u | |
irix-5.3 | |
irix64-6.2 | |
Windows will return one of: | |
win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc) | |
win-ia64 (64bit Windows on Itanium) | |
win32 (all others - specifically, sys.platform is returned) | |
For other non-POSIX platforms, currently just returns 'sys.platform'. | |
""" | |
if os.name == 'nt': | |
# sniff sys.version for architecture. | |
prefix = " bit (" | |
i = sys.version.find(prefix) | |
if i == -1: | |
return sys.platform | |
j = sys.version.find(")", i) | |
look = sys.version[i+len(prefix):j].lower() | |
if look == 'amd64': | |
return 'win-amd64' | |
if look == 'itanium': | |
return 'win-ia64' | |
return sys.platform | |
if os.name != "posix" or not hasattr(os, 'uname'): | |
# XXX what about the architecture? NT is Intel or Alpha, | |
# Mac OS is M68k or PPC, etc. | |
return sys.platform | |
# Try to distinguish various flavours of Unix | |
osname, host, release, version, machine = os.uname() | |
# Convert the OS name to lowercase, remove '/' characters | |
# (to accommodate BSD/OS), and translate spaces (for "Power Macintosh") | |
osname = osname.lower().replace('/', '') | |
machine = machine.replace(' ', '_') | |
machine = machine.replace('/', '-') | |
if osname[:5] == "linux": | |
# At least on Linux/Intel, 'machine' is the processor -- | |
# i386, etc. | |
# XXX what about Alpha, SPARC, etc? | |
return "%s-%s" % (osname, machine) | |
elif osname[:5] == "sunos": | |
if release[0] >= "5": # SunOS 5 == Solaris 2 | |
osname = "solaris" | |
release = "%d.%s" % (int(release[0]) - 3, release[2:]) | |
# fall through to standard osname-release-machine representation | |
elif osname[:4] == "irix": # could be "irix64"! | |
return "%s-%s" % (osname, release) | |
elif osname[:3] == "aix": | |
return "%s-%s.%s" % (osname, version, release) | |
elif osname[:6] == "cygwin": | |
osname = "cygwin" | |
rel_re = re.compile(r'[\d.]+') | |
m = rel_re.match(release) | |
if m: | |
release = m.group() | |
elif osname[:6] == "darwin": | |
# | |
# For our purposes, we'll assume that the system version from | |
# distutils' perspective is what MACOSX_DEPLOYMENT_TARGET is set | |
# to. This makes the compatibility story a bit more sane because the | |
# machine is going to compile and link as if it were | |
# MACOSX_DEPLOYMENT_TARGET. | |
cfgvars = get_config_vars() | |
macver = cfgvars.get('MACOSX_DEPLOYMENT_TARGET') | |
if True: | |
# Always calculate the release of the running machine, | |
# needed to determine if we can build fat binaries or not. | |
macrelease = macver | |
# Get the system version. Reading this plist is a documented | |
# way to get the system version (see the documentation for | |
# the Gestalt Manager) | |
try: | |
f = open('/System/Library/CoreServices/SystemVersion.plist') | |
except IOError: | |
# We're on a plain darwin box, fall back to the default | |
# behaviour. | |
pass | |
else: | |
try: | |
m = re.search(r'<key>ProductUserVisibleVersion</key>\s*' | |
r'<string>(.*?)</string>', f.read()) | |
finally: | |
f.close() | |
if m is not None: | |
macrelease = '.'.join(m.group(1).split('.')[:2]) | |
# else: fall back to the default behaviour | |
if not macver: | |
macver = macrelease | |
if macver: | |
release = macver | |
osname = "macosx" | |
if ((macrelease + '.') >= '10.4.' and | |
'-arch' in get_config_vars().get('CFLAGS', '').strip()): | |
# The universal build will build fat binaries, but not on | |
# systems before 10.4 | |
# | |
# Try to detect 4-way universal builds, those have machine-type | |
# 'universal' instead of 'fat'. | |
machine = 'fat' | |
cflags = get_config_vars().get('CFLAGS') | |
archs = re.findall('-arch\s+(\S+)', cflags) | |
archs = tuple(sorted(set(archs))) | |
if len(archs) == 1: | |
machine = archs[0] | |
elif archs == ('i386', 'ppc'): | |
machine = 'fat' | |
elif archs == ('i386', 'x86_64'): | |
machine = 'intel' | |
elif archs == ('i386', 'ppc', 'x86_64'): | |
machine = 'fat3' | |
elif archs == ('ppc64', 'x86_64'): | |
machine = 'fat64' | |
elif archs == ('i386', 'ppc', 'ppc64', 'x86_64'): | |
machine = 'universal' | |
else: | |
raise ValueError( | |
"Don't know machine value for archs=%r" % (archs,)) | |
elif machine == 'i386': | |
# On OSX the machine type returned by uname is always the | |
# 32-bit variant, even if the executable architecture is | |
# the 64-bit variant | |
if sys.maxsize >= 2**32: | |
machine = 'x86_64' | |
elif machine in ('PowerPC', 'Power_Macintosh'): | |
# Pick a sane name for the PPC architecture. | |
# See 'i386' case | |
if sys.maxsize >= 2**32: | |
machine = 'ppc64' | |
else: | |
machine = 'ppc' | |
return "%s-%s-%s" % (osname, release, machine) | |
def get_python_version(): | |
return _PY_VERSION_SHORT | |
def _print_dict(title, data): | |
for index, (key, value) in enumerate(sorted(data.items())): | |
if index == 0: | |
print('%s: ' % (title)) | |
print('\t%s = "%s"' % (key, value)) | |
def _main(): | |
"""Display all information sysconfig detains.""" | |
print('Platform: "%s"' % get_platform()) | |
print('Python version: "%s"' % get_python_version()) | |
print('Current installation scheme: "%s"' % _get_default_scheme()) | |
print() | |
_print_dict('Paths', get_paths()) | |
print() | |
_print_dict('Variables', get_config_vars()) | |
if __name__ == '__main__': | |
_main() |
#------------------------------------------------------------------- | |
# tarfile.py | |
#------------------------------------------------------------------- | |
# Copyright (C) 2002 Lars Gustaebel <[email protected]> | |
# All rights reserved. | |
# | |
# Permission is hereby granted, free of charge, to any person | |
# obtaining a copy of this software and associated documentation | |
# files (the "Software"), to deal in the Software without | |
# restriction, including without limitation the rights to use, | |
# copy, modify, merge, publish, distribute, sublicense, and/or sell | |
# copies of the Software, and to permit persons to whom the | |
# Software is furnished to do so, subject to the following | |
# conditions: | |
# | |
# The above copyright notice and this permission notice shall be | |
# included in all copies or substantial portions of the Software. | |
# | |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
# OTHER DEALINGS IN THE SOFTWARE. | |
# | |
from __future__ import print_function | |
"""Read from and write to tar format archives. | |
""" | |
__version__ = "$Revision$" | |
version = "0.9.0" | |
__author__ = "Lars Gust\u00e4bel ([email protected])" | |
__date__ = "$Date: 2011-02-25 17:42:01 +0200 (Fri, 25 Feb 2011) $" | |
__cvsid__ = "$Id: tarfile.py 88586 2011-02-25 15:42:01Z marc-andre.lemburg $" | |
__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend." | |
#--------- | |
# Imports | |
#--------- | |
import sys | |
import os | |
import stat | |
import errno | |
import time | |
import struct | |
import copy | |
import re | |
try: | |
import grp, pwd | |
except ImportError: | |
grp = pwd = None | |
# os.symlink on Windows prior to 6.0 raises NotImplementedError | |
symlink_exception = (AttributeError, NotImplementedError) | |
try: | |
# WindowsError (1314) will be raised if the caller does not hold the | |
# SeCreateSymbolicLinkPrivilege privilege | |
symlink_exception += (WindowsError,) | |
except NameError: | |
pass | |
# from tarfile import * | |
__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"] | |
if sys.version_info[0] < 3: | |
import __builtin__ as builtins | |
else: | |
import builtins | |
_open = builtins.open # Since 'open' is TarFile.open | |
#--------------------------------------------------------- | |
# tar constants | |
#--------------------------------------------------------- | |
NUL = b"\0" # the null character | |
BLOCKSIZE = 512 # length of processing blocks | |
RECORDSIZE = BLOCKSIZE * 20 # length of records | |
GNU_MAGIC = b"ustar \0" # magic gnu tar string | |
POSIX_MAGIC = b"ustar\x0000" # magic posix tar string | |
LENGTH_NAME = 100 # maximum length of a filename | |
LENGTH_LINK = 100 # maximum length of a linkname | |
LENGTH_PREFIX = 155 # maximum length of the prefix field | |
REGTYPE = b"0" # regular file | |
AREGTYPE = b"\0" # regular file | |
LNKTYPE = b"1" # link (inside tarfile) | |
SYMTYPE = b"2" # symbolic link | |
CHRTYPE = b"3" # character special device | |
BLKTYPE = b"4" # block special device | |
DIRTYPE = b"5" # directory | |
FIFOTYPE = b"6" # fifo special device | |
CONTTYPE = b"7" # contiguous file | |
GNUTYPE_LONGNAME = b"L" # GNU tar longname | |
GNUTYPE_LONGLINK = b"K" # GNU tar longlink | |
GNUTYPE_SPARSE = b"S" # GNU tar sparse file | |
XHDTYPE = b"x" # POSIX.1-2001 extended header | |
XGLTYPE = b"g" # POSIX.1-2001 global header | |
SOLARIS_XHDTYPE = b"X" # Solaris extended header | |
USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format | |
GNU_FORMAT = 1 # GNU tar format | |
PAX_FORMAT = 2 # POSIX.1-2001 (pax) format | |
DEFAULT_FORMAT = GNU_FORMAT | |
#--------------------------------------------------------- | |
# tarfile constants | |
#--------------------------------------------------------- | |
# File types that tarfile supports: | |
SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, | |
SYMTYPE, DIRTYPE, FIFOTYPE, | |
CONTTYPE, CHRTYPE, BLKTYPE, | |
GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, | |
GNUTYPE_SPARSE) | |
# File types that will be treated as a regular file. | |
REGULAR_TYPES = (REGTYPE, AREGTYPE, | |
CONTTYPE, GNUTYPE_SPARSE) | |
# File types that are part of the GNU tar format. | |
GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, | |
GNUTYPE_SPARSE) | |
# Fields from a pax header that override a TarInfo attribute. | |
PAX_FIELDS = ("path", "linkpath", "size", "mtime", | |
"uid", "gid", "uname", "gname") | |
# Fields from a pax header that are affected by hdrcharset. | |
PAX_NAME_FIELDS = set(("path", "linkpath", "uname", "gname")) | |
# Fields in a pax header that are numbers, all other fields | |
# are treated as strings. | |
PAX_NUMBER_FIELDS = { | |
"atime": float, | |
"ctime": float, | |
"mtime": float, | |
"uid": int, | |
"gid": int, | |
"size": int | |
} | |
#--------------------------------------------------------- | |
# Bits used in the mode field, values in octal. | |
#--------------------------------------------------------- | |
S_IFLNK = 0o120000 # symbolic link | |
S_IFREG = 0o100000 # regular file | |
S_IFBLK = 0o060000 # block device | |
S_IFDIR = 0o040000 # directory | |
S_IFCHR = 0o020000 # character device | |
S_IFIFO = 0o010000 # fifo | |
TSUID = 0o4000 # set UID on execution | |
TSGID = 0o2000 # set GID on execution | |
TSVTX = 0o1000 # reserved | |
TUREAD = 0o400 # read by owner | |
TUWRITE = 0o200 # write by owner | |
TUEXEC = 0o100 # execute/search by owner | |
TGREAD = 0o040 # read by group | |
TGWRITE = 0o020 # write by group | |
TGEXEC = 0o010 # execute/search by group | |
TOREAD = 0o004 # read by other | |
TOWRITE = 0o002 # write by other | |
TOEXEC = 0o001 # execute/search by other | |
#--------------------------------------------------------- | |
# initialization | |
#--------------------------------------------------------- | |
if os.name in ("nt", "ce"): | |
ENCODING = "utf-8" | |
else: | |
ENCODING = sys.getfilesystemencoding() | |
#--------------------------------------------------------- | |
# Some useful functions | |
#--------------------------------------------------------- | |
def stn(s, length, encoding, errors): | |
"""Convert a string to a null-terminated bytes object. | |
""" | |
s = s.encode(encoding, errors) | |
return s[:length] + (length - len(s)) * NUL | |
def nts(s, encoding, errors): | |
"""Convert a null-terminated bytes object to a string. | |
""" | |
p = s.find(b"\0") | |
if p != -1: | |
s = s[:p] | |
return s.decode(encoding, errors) | |
def nti(s): | |
"""Convert a number field to a python number. | |
""" | |
# There are two possible encodings for a number field, see | |
# itn() below. | |
if s[0] != chr(0o200): | |
try: | |
n = int(nts(s, "ascii", "strict") or "0", 8) | |
except ValueError: | |
raise InvalidHeaderError("invalid header") | |
else: | |
n = 0 | |
for i in range(len(s) - 1): | |
n <<= 8 | |
n += ord(s[i + 1]) | |
return n | |
def itn(n, digits=8, format=DEFAULT_FORMAT): | |
"""Convert a python number to a number field. | |
""" | |
# POSIX 1003.1-1988 requires numbers to be encoded as a string of | |
# octal digits followed by a null-byte, this allows values up to | |
# (8**(digits-1))-1. GNU tar allows storing numbers greater than | |
# that if necessary. A leading 0o200 byte indicates this particular | |
# encoding, the following digits-1 bytes are a big-endian | |
# representation. This allows values up to (256**(digits-1))-1. | |
if 0 <= n < 8 ** (digits - 1): | |
s = ("%0*o" % (digits - 1, n)).encode("ascii") + NUL | |
else: | |
if format != GNU_FORMAT or n >= 256 ** (digits - 1): | |
raise ValueError("overflow in number field") | |
if n < 0: | |
# XXX We mimic GNU tar's behaviour with negative numbers, | |
# this could raise OverflowError. | |
n = struct.unpack("L", struct.pack("l", n))[0] | |
s = bytearray() | |
for i in range(digits - 1): | |
s.insert(0, n & 0o377) | |
n >>= 8 | |
s.insert(0, 0o200) | |
return s | |
def calc_chksums(buf): | |
"""Calculate the checksum for a member's header by summing up all | |
characters except for the chksum field which is treated as if | |
it was filled with spaces. According to the GNU tar sources, | |
some tars (Sun and NeXT) calculate chksum with signed char, | |
which will be different if there are chars in the buffer with | |
the high bit set. So we calculate two checksums, unsigned and | |
signed. | |
""" | |
unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512])) | |
signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512])) | |
return unsigned_chksum, signed_chksum | |
def copyfileobj(src, dst, length=None): | |
"""Copy length bytes from fileobj src to fileobj dst. | |
If length is None, copy the entire content. | |
""" | |
if length == 0: | |
return | |
if length is None: | |
while True: | |
buf = src.read(16*1024) | |
if not buf: | |
break | |
dst.write(buf) | |
return | |
BUFSIZE = 16 * 1024 | |
blocks, remainder = divmod(length, BUFSIZE) | |
for b in range(blocks): | |
buf = src.read(BUFSIZE) | |
if len(buf) < BUFSIZE: | |
raise IOError("end of file reached") | |
dst.write(buf) | |
if remainder != 0: | |
buf = src.read(remainder) | |
if len(buf) < remainder: | |
raise IOError("end of file reached") | |
dst.write(buf) | |
return | |
filemode_table = ( | |
((S_IFLNK, "l"), | |
(S_IFREG, "-"), | |
(S_IFBLK, "b"), | |
(S_IFDIR, "d"), | |
(S_IFCHR, "c"), | |
(S_IFIFO, "p")), | |
((TUREAD, "r"),), | |
((TUWRITE, "w"),), | |
((TUEXEC|TSUID, "s"), | |
(TSUID, "S"), | |
(TUEXEC, "x")), | |
((TGREAD, "r"),), | |
((TGWRITE, "w"),), | |
((TGEXEC|TSGID, "s"), | |
(TSGID, "S"), | |
(TGEXEC, "x")), | |
((TOREAD, "r"),), | |
((TOWRITE, "w"),), | |
((TOEXEC|TSVTX, "t"), | |
(TSVTX, "T"), | |
(TOEXEC, "x")) | |
) | |
def filemode(mode): | |
"""Convert a file's mode to a string of the form | |
-rwxrwxrwx. | |
Used by TarFile.list() | |
""" | |
perm = [] | |
for table in filemode_table: | |
for bit, char in table: | |
if mode & bit == bit: | |
perm.append(char) | |
break | |
else: | |
perm.append("-") | |
return "".join(perm) | |
class TarError(Exception): | |
"""Base exception.""" | |
pass | |
class ExtractError(TarError): | |
"""General exception for extract errors.""" | |
pass | |
class ReadError(TarError): | |
"""Exception for unreadable tar archives.""" | |
pass | |
class CompressionError(TarError): | |
"""Exception for unavailable compression methods.""" | |
pass | |
class StreamError(TarError): | |
"""Exception for unsupported operations on stream-like TarFiles.""" | |
pass | |
class HeaderError(TarError): | |
"""Base exception for header errors.""" | |
pass | |
class EmptyHeaderError(HeaderError): | |
"""Exception for empty headers.""" | |
pass | |
class TruncatedHeaderError(HeaderError): | |
"""Exception for truncated headers.""" | |
pass | |
class EOFHeaderError(HeaderError): | |
"""Exception for end of file headers.""" | |
pass | |
class InvalidHeaderError(HeaderError): | |
"""Exception for invalid headers.""" | |
pass | |
class SubsequentHeaderError(HeaderError): | |
"""Exception for missing and invalid extended headers.""" | |
pass | |
#--------------------------- | |
# internal stream interface | |
#--------------------------- | |
class _LowLevelFile(object): | |
"""Low-level file object. Supports reading and writing. | |
It is used instead of a regular file object for streaming | |
access. | |
""" | |
def __init__(self, name, mode): | |
mode = { | |
"r": os.O_RDONLY, | |
"w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC, | |
}[mode] | |
if hasattr(os, "O_BINARY"): | |
mode |= os.O_BINARY | |
self.fd = os.open(name, mode, 0o666) | |
def close(self): | |
os.close(self.fd) | |
def read(self, size): | |
return os.read(self.fd, size) | |
def write(self, s): | |
os.write(self.fd, s) | |
class _Stream(object): | |
"""Class that serves as an adapter between TarFile and | |
a stream-like object. The stream-like object only | |
needs to have a read() or write() method and is accessed | |
blockwise. Use of gzip or bzip2 compression is possible. | |
A stream-like object could be for example: sys.stdin, | |
sys.stdout, a socket, a tape device etc. | |
_Stream is intended to be used only internally. | |
""" | |
def __init__(self, name, mode, comptype, fileobj, bufsize): | |
"""Construct a _Stream object. | |
""" | |
self._extfileobj = True | |
if fileobj is None: | |
fileobj = _LowLevelFile(name, mode) | |
self._extfileobj = False | |
if comptype == '*': | |
# Enable transparent compression detection for the | |
# stream interface | |
fileobj = _StreamProxy(fileobj) | |
comptype = fileobj.getcomptype() | |
self.name = name or "" | |
self.mode = mode | |
self.comptype = comptype | |
self.fileobj = fileobj | |
self.bufsize = bufsize | |
self.buf = b"" | |
self.pos = 0 | |
self.closed = False | |
try: | |
if comptype == "gz": | |
try: | |
import zlib | |
except ImportError: | |
raise CompressionError("zlib module is not available") | |
self.zlib = zlib | |
self.crc = zlib.crc32(b"") | |
if mode == "r": | |
self._init_read_gz() | |
else: | |
self._init_write_gz() | |
if comptype == "bz2": | |
try: | |
import bz2 | |
except ImportError: | |
raise CompressionError("bz2 module is not available") | |
if mode == "r": | |
self.dbuf = b"" | |
self.cmp = bz2.BZ2Decompressor() | |
else: | |
self.cmp = bz2.BZ2Compressor() | |
except: | |
if not self._extfileobj: | |
self.fileobj.close() | |
self.closed = True | |
raise | |
def __del__(self): | |
if hasattr(self, "closed") and not self.closed: | |
self.close() | |
def _init_write_gz(self): | |
"""Initialize for writing with gzip compression. | |
""" | |
self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED, | |
-self.zlib.MAX_WBITS, | |
self.zlib.DEF_MEM_LEVEL, | |
0) | |
timestamp = struct.pack("<L", int(time.time())) | |
self.__write(b"\037\213\010\010" + timestamp + b"\002\377") | |
if self.name.endswith(".gz"): | |
self.name = self.name[:-3] | |
# RFC1952 says we must use ISO-8859-1 for the FNAME field. | |
self.__write(self.name.encode("iso-8859-1", "replace") + NUL) | |
def write(self, s): | |
"""Write string s to the stream. | |
""" | |
if self.comptype == "gz": | |
self.crc = self.zlib.crc32(s, self.crc) | |
self.pos += len(s) | |
if self.comptype != "tar": | |
s = self.cmp.compress(s) | |
self.__write(s) | |
def __write(self, s): | |
"""Write string s to the stream if a whole new block | |
is ready to be written. | |
""" | |
self.buf += s | |
while len(self.buf) > self.bufsize: | |
self.fileobj.write(self.buf[:self.bufsize]) | |
self.buf = self.buf[self.bufsize:] | |
def close(self): | |
"""Close the _Stream object. No operation should be | |
done on it afterwards. | |
""" | |
if self.closed: | |
return | |
if self.mode == "w" and self.comptype != "tar": | |
self.buf += self.cmp.flush() | |
if self.mode == "w" and self.buf: | |
self.fileobj.write(self.buf) | |
self.buf = b"" | |
if self.comptype == "gz": | |
# The native zlib crc is an unsigned 32-bit integer, but | |
# the Python wrapper implicitly casts that to a signed C | |
# long. So, on a 32-bit box self.crc may "look negative", | |
# while the same crc on a 64-bit box may "look positive". | |
# To avoid irksome warnings from the `struct` module, force | |
# it to look positive on all boxes. | |
self.fileobj.write(struct.pack("<L", self.crc & 0xffffffff)) | |
self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF)) | |
if not self._extfileobj: | |
self.fileobj.close() | |
self.closed = True | |
def _init_read_gz(self): | |
"""Initialize for reading a gzip compressed fileobj. | |
""" | |
self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS) | |
self.dbuf = b"" | |
# taken from gzip.GzipFile with some alterations | |
if self.__read(2) != b"\037\213": | |
raise ReadError("not a gzip file") | |
if self.__read(1) != b"\010": | |
raise CompressionError("unsupported compression method") | |
flag = ord(self.__read(1)) | |
self.__read(6) | |
if flag & 4: | |
xlen = ord(self.__read(1)) + 256 * ord(self.__read(1)) | |
self.read(xlen) | |
if flag & 8: | |
while True: | |
s = self.__read(1) | |
if not s or s == NUL: | |
break | |
if flag & 16: | |
while True: | |
s = self.__read(1) | |
if not s or s == NUL: | |
break | |
if flag & 2: | |
self.__read(2) | |
def tell(self): | |
"""Return the stream's file pointer position. | |
""" | |
return self.pos | |
def seek(self, pos=0): | |
"""Set the stream's file pointer to pos. Negative seeking | |
is forbidden. | |
""" | |
if pos - self.pos >= 0: | |
blocks, remainder = divmod(pos - self.pos, self.bufsize) | |
for i in range(blocks): | |
self.read(self.bufsize) | |
self.read(remainder) | |
else: | |
raise StreamError("seeking backwards is not allowed") | |
return self.pos | |
def read(self, size=None): | |
"""Return the next size number of bytes from the stream. | |
If size is not defined, return all bytes of the stream | |
up to EOF. | |
""" | |
if size is None: | |
t = [] | |
while True: | |
buf = self._read(self.bufsize) | |
if not buf: | |
break | |
t.append(buf) | |
buf = "".join(t) | |
else: | |
buf = self._read(size) | |
self.pos += len(buf) | |
return buf | |
def _read(self, size): | |
"""Return size bytes from the stream. | |
""" | |
if self.comptype == "tar": | |
return self.__read(size) | |
c = len(self.dbuf) | |
while c < size: | |
buf = self.__read(self.bufsize) | |
if not buf: | |
break | |
try: | |
buf = self.cmp.decompress(buf) | |
except IOError: | |
raise ReadError("invalid compressed data") | |
self.dbuf += buf | |
c += len(buf) | |
buf = self.dbuf[:size] | |
self.dbuf = self.dbuf[size:] | |
return buf | |
def __read(self, size): | |
"""Return size bytes from stream. If internal buffer is empty, | |
read another block from the stream. | |
""" | |
c = len(self.buf) | |
while c < size: | |
buf = self.fileobj.read(self.bufsize) | |
if not buf: | |
break | |
self.buf += buf | |
c += len(buf) | |
buf = self.buf[:size] | |
self.buf = self.buf[size:] | |
return buf | |
# class _Stream | |
class _StreamProxy(object): | |
"""Small proxy class that enables transparent compression | |
detection for the Stream interface (mode 'r|*'). | |
""" | |
def __init__(self, fileobj): | |
self.fileobj = fileobj | |
self.buf = self.fileobj.read(BLOCKSIZE) | |
def read(self, size): | |
self.read = self.fileobj.read | |
return self.buf | |
def getcomptype(self): | |
if self.buf.startswith(b"\037\213\010"): | |
return "gz" | |
if self.buf.startswith(b"BZh91"): | |
return "bz2" | |
return "tar" | |
def close(self): | |
self.fileobj.close() | |
# class StreamProxy | |
class _BZ2Proxy(object): | |
"""Small proxy class that enables external file object | |
support for "r:bz2" and "w:bz2" modes. This is actually | |
a workaround for a limitation in bz2 module's BZ2File | |
class which (unlike gzip.GzipFile) has no support for | |
a file object argument. | |
""" | |
blocksize = 16 * 1024 | |
def __init__(self, fileobj, mode): | |
self.fileobj = fileobj | |
self.mode = mode | |
self.name = getattr(self.fileobj, "name", None) | |
self.init() | |
def init(self): | |
import bz2 | |
self.pos = 0 | |
if self.mode == "r": | |
self.bz2obj = bz2.BZ2Decompressor() | |
self.fileobj.seek(0) | |
self.buf = b"" | |
else: | |
self.bz2obj = bz2.BZ2Compressor() | |
def read(self, size): | |
x = len(self.buf) | |
while x < size: | |
raw = self.fileobj.read(self.blocksize) | |
if not raw: | |
break | |
data = self.bz2obj.decompress(raw) | |
self.buf += data | |
x += len(data) | |
buf = self.buf[:size] | |
self.buf = self.buf[size:] | |
self.pos += len(buf) | |
return buf | |
def seek(self, pos): | |
if pos < self.pos: | |
self.init() | |
self.read(pos - self.pos) | |
def tell(self): | |
return self.pos | |
def write(self, data): | |
self.pos += len(data) | |
raw = self.bz2obj.compress(data) | |
self.fileobj.write(raw) | |
def close(self): | |
if self.mode == "w": | |
raw = self.bz2obj.flush() | |
self.fileobj.write(raw) | |
# class _BZ2Proxy | |
#------------------------ | |
# Extraction file object | |
#------------------------ | |
class _FileInFile(object): | |
"""A thin wrapper around an existing file object that | |
provides a part of its data as an individual file | |
object. | |
""" | |
def __init__(self, fileobj, offset, size, blockinfo=None): | |
self.fileobj = fileobj | |
self.offset = offset | |
self.size = size | |
self.position = 0 | |
if blockinfo is None: | |
blockinfo = [(0, size)] | |
# Construct a map with data and zero blocks. | |
self.map_index = 0 | |
self.map = [] | |
lastpos = 0 | |
realpos = self.offset | |
for offset, size in blockinfo: | |
if offset > lastpos: | |
self.map.append((False, lastpos, offset, None)) | |
self.map.append((True, offset, offset + size, realpos)) | |
realpos += size | |
lastpos = offset + size | |
if lastpos < self.size: | |
self.map.append((False, lastpos, self.size, None)) | |
def seekable(self): | |
if not hasattr(self.fileobj, "seekable"): | |
# XXX gzip.GzipFile and bz2.BZ2File | |
return True | |
return self.fileobj.seekable() | |
def tell(self): | |
"""Return the current file position. | |
""" | |
return self.position | |
def seek(self, position): | |
"""Seek to a position in the file. | |
""" | |
self.position = position | |
def read(self, size=None): | |
"""Read data from the file. | |
""" | |
if size is None: | |
size = self.size - self.position | |
else: | |
size = min(size, self.size - self.position) | |
buf = b"" | |
while size > 0: | |
while True: | |
data, start, stop, offset = self.map[self.map_index] | |
if start <= self.position < stop: | |
break | |
else: | |
self.map_index += 1 | |
if self.map_index == len(self.map): | |
self.map_index = 0 | |
length = min(size, stop - self.position) | |
if data: | |
self.fileobj.seek(offset + (self.position - start)) | |
buf += self.fileobj.read(length) | |
else: | |
buf += NUL * length | |
size -= length | |
self.position += length | |
return buf | |
#class _FileInFile | |
class ExFileObject(object): | |
"""File-like object for reading an archive member. | |
Is returned by TarFile.extractfile(). | |
""" | |
blocksize = 1024 | |
def __init__(self, tarfile, tarinfo): | |
self.fileobj = _FileInFile(tarfile.fileobj, | |
tarinfo.offset_data, | |
tarinfo.size, | |
tarinfo.sparse) | |
self.name = tarinfo.name | |
self.mode = "r" | |
self.closed = False | |
self.size = tarinfo.size | |
self.position = 0 | |
self.buffer = b"" | |
def readable(self): | |
return True | |
def writable(self): | |
return False | |
def seekable(self): | |
return self.fileobj.seekable() | |
def read(self, size=None): | |
"""Read at most size bytes from the file. If size is not | |
present or None, read all data until EOF is reached. | |
""" | |
if self.closed: | |
raise ValueError("I/O operation on closed file") | |
buf = b"" | |
if self.buffer: | |
if size is None: | |
buf = self.buffer | |
self.buffer = b"" | |
else: | |
buf = self.buffer[:size] | |
self.buffer = self.buffer[size:] | |
if size is None: | |
buf += self.fileobj.read() | |
else: | |
buf += self.fileobj.read(size - len(buf)) | |
self.position += len(buf) | |
return buf | |
# XXX TextIOWrapper uses the read1() method. | |
read1 = read | |
def readline(self, size=-1): | |
"""Read one entire line from the file. If size is present | |
and non-negative, return a string with at most that | |
size, which may be an incomplete line. | |
""" | |
if self.closed: | |
raise ValueError("I/O operation on closed file") | |
pos = self.buffer.find(b"\n") + 1 | |
if pos == 0: | |
# no newline found. | |
while True: | |
buf = self.fileobj.read(self.blocksize) | |
self.buffer += buf | |
if not buf or b"\n" in buf: | |
pos = self.buffer.find(b"\n") + 1 | |
if pos == 0: | |
# no newline found. | |
pos = len(self.buffer) | |
break | |
if size != -1: | |
pos = min(size, pos) | |
buf = self.buffer[:pos] | |
self.buffer = self.buffer[pos:] | |
self.position += len(buf) | |
return buf | |
def readlines(self): | |
"""Return a list with all remaining lines. | |
""" | |
result = [] | |
while True: | |
line = self.readline() | |
if not line: break | |
result.append(line) | |
return result | |
def tell(self): | |
"""Return the current file position. | |
""" | |
if self.closed: | |
raise ValueError("I/O operation on closed file") | |
return self.position | |
def seek(self, pos, whence=os.SEEK_SET): | |
"""Seek to a position in the file. | |
""" | |
if self.closed: | |
raise ValueError("I/O operation on closed file") | |
if whence == os.SEEK_SET: | |
self.position = min(max(pos, 0), self.size) | |
elif whence == os.SEEK_CUR: | |
if pos < 0: | |
self.position = max(self.position + pos, 0) | |
else: | |
self.position = min(self.position + pos, self.size) | |
elif whence == os.SEEK_END: | |
self.position = max(min(self.size + pos, self.size), 0) | |
else: | |
raise ValueError("Invalid argument") | |
self.buffer = b"" | |
self.fileobj.seek(self.position) | |
def close(self): | |
"""Close the file object. | |
""" | |
self.closed = True | |
def __iter__(self): | |
"""Get an iterator over the file's lines. | |
""" | |
while True: | |
line = self.readline() | |
if not line: | |
break | |
yield line | |
#class ExFileObject | |
#------------------ | |
# Exported Classes | |
#------------------ | |
class TarInfo(object): | |
"""Informational class which holds the details about an | |
archive member given by a tar header block. | |
TarInfo objects are returned by TarFile.getmember(), | |
TarFile.getmembers() and TarFile.gettarinfo() and are | |
usually created internally. | |
""" | |
__slots__ = ("name", "mode", "uid", "gid", "size", "mtime", | |
"chksum", "type", "linkname", "uname", "gname", | |
"devmajor", "devminor", | |
"offset", "offset_data", "pax_headers", "sparse", | |
"tarfile", "_sparse_structs", "_link_target") | |
def __init__(self, name=""): | |
"""Construct a TarInfo object. name is the optional name | |
of the member. | |
""" | |
self.name = name # member name | |
self.mode = 0o644 # file permissions | |
self.uid = 0 # user id | |
self.gid = 0 # group id | |
self.size = 0 # file size | |
self.mtime = 0 # modification time | |
self.chksum = 0 # header checksum | |
self.type = REGTYPE # member type | |
self.linkname = "" # link name | |
self.uname = "" # user name | |
self.gname = "" # group name | |
self.devmajor = 0 # device major number | |
self.devminor = 0 # device minor number | |
self.offset = 0 # the tar header starts here | |
self.offset_data = 0 # the file's data starts here | |
self.sparse = None # sparse member information | |
self.pax_headers = {} # pax header information | |
# In pax headers the "name" and "linkname" field are called | |
# "path" and "linkpath". | |
def _getpath(self): | |
return self.name | |
def _setpath(self, name): | |
self.name = name | |
path = property(_getpath, _setpath) | |
def _getlinkpath(self): | |
return self.linkname | |
def _setlinkpath(self, linkname): | |
self.linkname = linkname | |
linkpath = property(_getlinkpath, _setlinkpath) | |
def __repr__(self): | |
return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self)) | |
def get_info(self): | |
"""Return the TarInfo's attributes as a dictionary. | |
""" | |
info = { | |
"name": self.name, | |
"mode": self.mode & 0o7777, | |
"uid": self.uid, | |
"gid": self.gid, | |
"size": self.size, | |
"mtime": self.mtime, | |
"chksum": self.chksum, | |
"type": self.type, | |
"linkname": self.linkname, | |
"uname": self.uname, | |
"gname": self.gname, | |
"devmajor": self.devmajor, | |
"devminor": self.devminor | |
} | |
if info["type"] == DIRTYPE and not info["name"].endswith("/"): | |
info["name"] += "/" | |
return info | |
def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"): | |
"""Return a tar header as a string of 512 byte blocks. | |
""" | |
info = self.get_info() | |
if format == USTAR_FORMAT: | |
return self.create_ustar_header(info, encoding, errors) | |
elif format == GNU_FORMAT: | |
return self.create_gnu_header(info, encoding, errors) | |
elif format == PAX_FORMAT: | |
return self.create_pax_header(info, encoding) | |
else: | |
raise ValueError("invalid format") | |
def create_ustar_header(self, info, encoding, errors): | |
"""Return the object as a ustar header block. | |
""" | |
info["magic"] = POSIX_MAGIC | |
if len(info["linkname"]) > LENGTH_LINK: | |
raise ValueError("linkname is too long") | |
if len(info["name"]) > LENGTH_NAME: | |
info["prefix"], info["name"] = self._posix_split_name(info["name"]) | |
return self._create_header(info, USTAR_FORMAT, encoding, errors) | |
def create_gnu_header(self, info, encoding, errors): | |
"""Return the object as a GNU header block sequence. | |
""" | |
info["magic"] = GNU_MAGIC | |
buf = b"" | |
if len(info["linkname"]) > LENGTH_LINK: | |
buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors) | |
if len(info["name"]) > LENGTH_NAME: | |
buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors) | |
return buf + self._create_header(info, GNU_FORMAT, encoding, errors) | |
def create_pax_header(self, info, encoding): | |
"""Return the object as a ustar header block. If it cannot be | |
represented this way, prepend a pax extended header sequence | |
with supplement information. | |
""" | |
info["magic"] = POSIX_MAGIC | |
pax_headers = self.pax_headers.copy() | |
# Test string fields for values that exceed the field length or cannot | |
# be represented in ASCII encoding. | |
for name, hname, length in ( | |
("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK), | |
("uname", "uname", 32), ("gname", "gname", 32)): | |
if hname in pax_headers: | |
# The pax header has priority. | |
continue | |
# Try to encode the string as ASCII. | |
try: | |
info[name].encode("ascii", "strict") | |
except UnicodeEncodeError: | |
pax_headers[hname] = info[name] | |
continue | |
if len(info[name]) > length: | |
pax_headers[hname] = info[name] | |
# Test number fields for values that exceed the field limit or values | |
# that like to be stored as float. | |
for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)): | |
if name in pax_headers: | |
# The pax header has priority. Avoid overflow. | |
info[name] = 0 | |
continue | |
val = info[name] | |
if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float): | |
pax_headers[name] = str(val) | |
info[name] = 0 | |
# Create a pax extended header if necessary. | |
if pax_headers: | |
buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding) | |
else: | |
buf = b"" | |
return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace") | |
@classmethod | |
def create_pax_global_header(cls, pax_headers): | |
"""Return the object as a pax global header block sequence. | |
""" | |
return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf8") | |
def _posix_split_name(self, name): | |
"""Split a name longer than 100 chars into a prefix | |
and a name part. | |
""" | |
prefix = name[:LENGTH_PREFIX + 1] | |
while prefix and prefix[-1] != "/": | |
prefix = prefix[:-1] | |
name = name[len(prefix):] | |
prefix = prefix[:-1] | |
if not prefix or len(name) > LENGTH_NAME: | |
raise ValueError("name is too long") | |
return prefix, name | |
@staticmethod | |
def _create_header(info, format, encoding, errors): | |
"""Return a header block. info is a dictionary with file | |
information, format must be one of the *_FORMAT constants. | |
""" | |
parts = [ | |
stn(info.get("name", ""), 100, encoding, errors), | |
itn(info.get("mode", 0) & 0o7777, 8, format), | |
itn(info.get("uid", 0), 8, format), | |
itn(info.get("gid", 0), 8, format), | |
itn(info.get("size", 0), 12, format), | |
itn(info.get("mtime", 0), 12, format), | |
b" ", # checksum field | |
info.get("type", REGTYPE), | |
stn(info.get("linkname", ""), 100, encoding, errors), | |
info.get("magic", POSIX_MAGIC), | |
stn(info.get("uname", ""), 32, encoding, errors), | |
stn(info.get("gname", ""), 32, encoding, errors), | |
itn(info.get("devmajor", 0), 8, format), | |
itn(info.get("devminor", 0), 8, format), | |
stn(info.get("prefix", ""), 155, encoding, errors) | |
] | |
buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts)) | |
chksum = calc_chksums(buf[-BLOCKSIZE:])[0] | |
buf = buf[:-364] + ("%06o\0" % chksum).encode("ascii") + buf[-357:] | |
return buf | |
@staticmethod | |
def _create_payload(payload): | |
"""Return the string payload filled with zero bytes | |
up to the next 512 byte border. | |
""" | |
blocks, remainder = divmod(len(payload), BLOCKSIZE) | |
if remainder > 0: | |
payload += (BLOCKSIZE - remainder) * NUL | |
return payload | |
@classmethod | |
def _create_gnu_long_header(cls, name, type, encoding, errors): | |
"""Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence | |
for name. | |
""" | |
name = name.encode(encoding, errors) + NUL | |
info = {} | |
info["name"] = "././@LongLink" | |
info["type"] = type | |
info["size"] = len(name) | |
info["magic"] = GNU_MAGIC | |
# create extended header + name blocks. | |
return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \ | |
cls._create_payload(name) | |
@classmethod | |
def _create_pax_generic_header(cls, pax_headers, type, encoding): | |
"""Return a POSIX.1-2008 extended or global header sequence | |
that contains a list of keyword, value pairs. The values | |
must be strings. | |
""" | |
# Check if one of the fields contains surrogate characters and thereby | |
# forces hdrcharset=BINARY, see _proc_pax() for more information. | |
binary = False | |
for keyword, value in pax_headers.items(): | |
try: | |
value.encode("utf8", "strict") | |
except UnicodeEncodeError: | |
binary = True | |
break | |
records = b"" | |
if binary: | |
# Put the hdrcharset field at the beginning of the header. | |
records += b"21 hdrcharset=BINARY\n" | |
for keyword, value in pax_headers.items(): | |
keyword = keyword.encode("utf8") | |
if binary: | |
# Try to restore the original byte representation of `value'. | |
# Needless to say, that the encoding must match the string. | |
value = value.encode(encoding, "surrogateescape") | |
else: | |
value = value.encode("utf8") | |
l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n' | |
n = p = 0 | |
while True: | |
n = l + len(str(p)) | |
if n == p: | |
break | |
p = n | |
records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n" | |
# We use a hardcoded "././@PaxHeader" name like star does | |
# instead of the one that POSIX recommends. | |
info = {} | |
info["name"] = "././@PaxHeader" | |
info["type"] = type | |
info["size"] = len(records) | |
info["magic"] = POSIX_MAGIC | |
# Create pax header + record blocks. | |
return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \ | |
cls._create_payload(records) | |
@classmethod | |
def frombuf(cls, buf, encoding, errors): | |
"""Construct a TarInfo object from a 512 byte bytes object. | |
""" | |
if len(buf) == 0: | |
raise EmptyHeaderError("empty header") | |
if len(buf) != BLOCKSIZE: | |
raise TruncatedHeaderError("truncated header") | |
if buf.count(NUL) == BLOCKSIZE: | |
raise EOFHeaderError("end of file header") | |
chksum = nti(buf[148:156]) | |
if chksum not in calc_chksums(buf): | |
raise InvalidHeaderError("bad checksum") | |
obj = cls() | |
obj.name = nts(buf[0:100], encoding, errors) | |
obj.mode = nti(buf[100:108]) | |
obj.uid = nti(buf[108:116]) | |
obj.gid = nti(buf[116:124]) | |
obj.size = nti(buf[124:136]) | |
obj.mtime = nti(buf[136:148]) | |
obj.chksum = chksum | |
obj.type = buf[156:157] | |
obj.linkname = nts(buf[157:257], encoding, errors) | |
obj.uname = nts(buf[265:297], encoding, errors) | |
obj.gname = nts(buf[297:329], encoding, errors) | |
obj.devmajor = nti(buf[329:337]) | |
obj.devminor = nti(buf[337:345]) | |
prefix = nts(buf[345:500], encoding, errors) | |
# Old V7 tar format represents a directory as a regular | |
# file with a trailing slash. | |
if obj.type == AREGTYPE and obj.name.endswith("/"): | |
obj.type = DIRTYPE | |
# The old GNU sparse format occupies some of the unused | |
# space in the buffer for up to 4 sparse structures. | |
# Save the them for later processing in _proc_sparse(). | |
if obj.type == GNUTYPE_SPARSE: | |
pos = 386 | |
structs = [] | |
for i in range(4): | |
try: | |
offset = nti(buf[pos:pos + 12]) | |
numbytes = nti(buf[pos + 12:pos + 24]) | |
except ValueError: | |
break | |
structs.append((offset, numbytes)) | |
pos += 24 | |
isextended = bool(buf[482]) | |
origsize = nti(buf[483:495]) | |
obj._sparse_structs = (structs, isextended, origsize) | |
# Remove redundant slashes from directories. | |
if obj.isdir(): | |
obj.name = obj.name.rstrip("/") | |
# Reconstruct a ustar longname. | |
if prefix and obj.type not in GNU_TYPES: | |
obj.name = prefix + "/" + obj.name | |
return obj | |
@classmethod | |
def fromtarfile(cls, tarfile): | |
"""Return the next TarInfo object from TarFile object | |
tarfile. | |
""" | |
buf = tarfile.fileobj.read(BLOCKSIZE) | |
obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors) | |
obj.offset = tarfile.fileobj.tell() - BLOCKSIZE | |
return obj._proc_member(tarfile) | |
#-------------------------------------------------------------------------- | |
# The following are methods that are called depending on the type of a | |
# member. The entry point is _proc_member() which can be overridden in a | |
# subclass to add custom _proc_*() methods. A _proc_*() method MUST | |
# implement the following | |
# operations: | |
# 1. Set self.offset_data to the position where the data blocks begin, | |
# if there is data that follows. | |
# 2. Set tarfile.offset to the position where the next member's header will | |
# begin. | |
# 3. Return self or another valid TarInfo object. | |
def _proc_member(self, tarfile): | |
"""Choose the right processing method depending on | |
the type and call it. | |
""" | |
if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK): | |
return self._proc_gnulong(tarfile) | |
elif self.type == GNUTYPE_SPARSE: | |
return self._proc_sparse(tarfile) | |
elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE): | |
return self._proc_pax(tarfile) | |
else: | |
return self._proc_builtin(tarfile) | |
def _proc_builtin(self, tarfile): | |
"""Process a builtin type or an unknown type which | |
will be treated as a regular file. | |
""" | |
self.offset_data = tarfile.fileobj.tell() | |
offset = self.offset_data | |
if self.isreg() or self.type not in SUPPORTED_TYPES: | |
# Skip the following data blocks. | |
offset += self._block(self.size) | |
tarfile.offset = offset | |
# Patch the TarInfo object with saved global | |
# header information. | |
self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors) | |
return self | |
def _proc_gnulong(self, tarfile): | |
"""Process the blocks that hold a GNU longname | |
or longlink member. | |
""" | |
buf = tarfile.fileobj.read(self._block(self.size)) | |
# Fetch the next header and process it. | |
try: | |
next = self.fromtarfile(tarfile) | |
except HeaderError: | |
raise SubsequentHeaderError("missing or bad subsequent header") | |
# Patch the TarInfo object from the next header with | |
# the longname information. | |
next.offset = self.offset | |
if self.type == GNUTYPE_LONGNAME: | |
next.name = nts(buf, tarfile.encoding, tarfile.errors) | |
elif self.type == GNUTYPE_LONGLINK: | |
next.linkname = nts(buf, tarfile.encoding, tarfile.errors) | |
return next | |
def _proc_sparse(self, tarfile): | |
"""Process a GNU sparse header plus extra headers. | |
""" | |
# We already collected some sparse structures in frombuf(). | |
structs, isextended, origsize = self._sparse_structs | |
del self._sparse_structs | |
# Collect sparse structures from extended header blocks. | |
while isextended: | |
buf = tarfile.fileobj.read(BLOCKSIZE) | |
pos = 0 | |
for i in range(21): | |
try: | |
offset = nti(buf[pos:pos + 12]) | |
numbytes = nti(buf[pos + 12:pos + 24]) | |
except ValueError: | |
break | |
if offset and numbytes: | |
structs.append((offset, numbytes)) | |
pos += 24 | |
isextended = bool(buf[504]) | |
self.sparse = structs | |
self.offset_data = tarfile.fileobj.tell() | |
tarfile.offset = self.offset_data + self._block(self.size) | |
self.size = origsize | |
return self | |
def _proc_pax(self, tarfile): | |
"""Process an extended or global header as described in | |
POSIX.1-2008. | |
""" | |
# Read the header information. | |
buf = tarfile.fileobj.read(self._block(self.size)) | |
# A pax header stores supplemental information for either | |
# the following file (extended) or all following files | |
# (global). | |
if self.type == XGLTYPE: | |
pax_headers = tarfile.pax_headers | |
else: | |
pax_headers = tarfile.pax_headers.copy() | |
# Check if the pax header contains a hdrcharset field. This tells us | |
# the encoding of the path, linkpath, uname and gname fields. Normally, | |
# these fields are UTF-8 encoded but since POSIX.1-2008 tar | |
# implementations are allowed to store them as raw binary strings if | |
# the translation to UTF-8 fails. | |
match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf) | |
if match is not None: | |
pax_headers["hdrcharset"] = match.group(1).decode("utf8") | |
# For the time being, we don't care about anything other than "BINARY". | |
# The only other value that is currently allowed by the standard is | |
# "ISO-IR 10646 2000 UTF-8" in other words UTF-8. | |
hdrcharset = pax_headers.get("hdrcharset") | |
if hdrcharset == "BINARY": | |
encoding = tarfile.encoding | |
else: | |
encoding = "utf8" | |
# Parse pax header information. A record looks like that: | |
# "%d %s=%s\n" % (length, keyword, value). length is the size | |
# of the complete record including the length field itself and | |
# the newline. keyword and value are both UTF-8 encoded strings. | |
regex = re.compile(br"(\d+) ([^=]+)=") | |
pos = 0 | |
while True: | |
match = regex.match(buf, pos) | |
if not match: | |
break | |
length, keyword = match.groups() | |
length = int(length) | |
value = buf[match.end(2) + 1:match.start(1) + length - 1] | |
# Normally, we could just use "utf8" as the encoding and "strict" | |
# as the error handler, but we better not take the risk. For | |
# example, GNU tar <= 1.23 is known to store filenames it cannot | |
# translate to UTF-8 as raw strings (unfortunately without a | |
# hdrcharset=BINARY header). | |
# We first try the strict standard encoding, and if that fails we | |
# fall back on the user's encoding and error handler. | |
keyword = self._decode_pax_field(keyword, "utf8", "utf8", | |
tarfile.errors) | |
if keyword in PAX_NAME_FIELDS: | |
value = self._decode_pax_field(value, encoding, tarfile.encoding, | |
tarfile.errors) | |
else: | |
value = self._decode_pax_field(value, "utf8", "utf8", | |
tarfile.errors) | |
pax_headers[keyword] = value | |
pos += length | |
# Fetch the next header. | |
try: | |
next = self.fromtarfile(tarfile) | |
except HeaderError: | |
raise SubsequentHeaderError("missing or bad subsequent header") | |
# Process GNU sparse information. | |
if "GNU.sparse.map" in pax_headers: | |
# GNU extended sparse format version 0.1. | |
self._proc_gnusparse_01(next, pax_headers) | |
elif "GNU.sparse.size" in pax_headers: | |
# GNU extended sparse format version 0.0. | |
self._proc_gnusparse_00(next, pax_headers, buf) | |
elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0": | |
# GNU extended sparse format version 1.0. | |
self._proc_gnusparse_10(next, pax_headers, tarfile) | |
if self.type in (XHDTYPE, SOLARIS_XHDTYPE): | |
# Patch the TarInfo object with the extended header info. | |
next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors) | |
next.offset = self.offset | |
if "size" in pax_headers: | |
# If the extended header replaces the size field, | |
# we need to recalculate the offset where the next | |
# header starts. | |
offset = next.offset_data | |
if next.isreg() or next.type not in SUPPORTED_TYPES: | |
offset += next._block(next.size) | |
tarfile.offset = offset | |
return next | |
def _proc_gnusparse_00(self, next, pax_headers, buf): | |
"""Process a GNU tar extended sparse header, version 0.0. | |
""" | |
offsets = [] | |
for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf): | |
offsets.append(int(match.group(1))) | |
numbytes = [] | |
for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf): | |
numbytes.append(int(match.group(1))) | |
next.sparse = list(zip(offsets, numbytes)) | |
def _proc_gnusparse_01(self, next, pax_headers): | |
"""Process a GNU tar extended sparse header, version 0.1. | |
""" | |
sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")] | |
next.sparse = list(zip(sparse[::2], sparse[1::2])) | |
def _proc_gnusparse_10(self, next, pax_headers, tarfile): | |
"""Process a GNU tar extended sparse header, version 1.0. | |
""" | |
fields = None | |
sparse = [] | |
buf = tarfile.fileobj.read(BLOCKSIZE) | |
fields, buf = buf.split(b"\n", 1) | |
fields = int(fields) | |
while len(sparse) < fields * 2: | |
if b"\n" not in buf: | |
buf += tarfile.fileobj.read(BLOCKSIZE) | |
number, buf = buf.split(b"\n", 1) | |
sparse.append(int(number)) | |
next.offset_data = tarfile.fileobj.tell() | |
next.sparse = list(zip(sparse[::2], sparse[1::2])) | |
def _apply_pax_info(self, pax_headers, encoding, errors): | |
"""Replace fields with supplemental information from a previous | |
pax extended or global header. | |
""" | |
for keyword, value in pax_headers.items(): | |
if keyword == "GNU.sparse.name": | |
setattr(self, "path", value) | |
elif keyword == "GNU.sparse.size": | |
setattr(self, "size", int(value)) | |
elif keyword == "GNU.sparse.realsize": | |
setattr(self, "size", int(value)) | |
elif keyword in PAX_FIELDS: | |
if keyword in PAX_NUMBER_FIELDS: | |
try: | |
value = PAX_NUMBER_FIELDS[keyword](value) | |
except ValueError: | |
value = 0 | |
if keyword == "path": | |
value = value.rstrip("/") | |
setattr(self, keyword, value) | |
self.pax_headers = pax_headers.copy() | |
def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors): | |
"""Decode a single field from a pax record. | |
""" | |
try: | |
return value.decode(encoding, "strict") | |
except UnicodeDecodeError: | |
return value.decode(fallback_encoding, fallback_errors) | |
def _block(self, count): | |
"""Round up a byte count by BLOCKSIZE and return it, | |
e.g. _block(834) => 1024. | |
""" | |
blocks, remainder = divmod(count, BLOCKSIZE) | |
if remainder: | |
blocks += 1 | |
return blocks * BLOCKSIZE | |
def isreg(self): | |
return self.type in REGULAR_TYPES | |
def isfile(self): | |
return self.isreg() | |
def isdir(self): | |
return self.type == DIRTYPE | |
def issym(self): | |
return self.type == SYMTYPE | |
def islnk(self): | |
return self.type == LNKTYPE | |
def ischr(self): | |
return self.type == CHRTYPE | |
def isblk(self): | |
return self.type == BLKTYPE | |
def isfifo(self): | |
return self.type == FIFOTYPE | |
def issparse(self): | |
return self.sparse is not None | |
def isdev(self): | |
return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE) | |
# class TarInfo | |
class TarFile(object): | |
"""The TarFile Class provides an interface to tar archives. | |
""" | |
debug = 0 # May be set from 0 (no msgs) to 3 (all msgs) | |
dereference = False # If true, add content of linked file to the | |
# tar file, else the link. | |
ignore_zeros = False # If true, skips empty or invalid blocks and | |
# continues processing. | |
errorlevel = 1 # If 0, fatal errors only appear in debug | |
# messages (if debug >= 0). If > 0, errors | |
# are passed to the caller as exceptions. | |
format = DEFAULT_FORMAT # The format to use when creating an archive. | |
encoding = ENCODING # Encoding for 8-bit character strings. | |
errors = None # Error handler for unicode conversion. | |
tarinfo = TarInfo # The default TarInfo class to use. | |
fileobject = ExFileObject # The default ExFileObject class to use. | |
def __init__(self, name=None, mode="r", fileobj=None, format=None, | |
tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, | |
errors="surrogateescape", pax_headers=None, debug=None, errorlevel=None): | |
"""Open an (uncompressed) tar archive `name'. `mode' is either 'r' to | |
read from an existing archive, 'a' to append data to an existing | |
file or 'w' to create a new file overwriting an existing one. `mode' | |
defaults to 'r'. | |
If `fileobj' is given, it is used for reading or writing data. If it | |
can be determined, `mode' is overridden by `fileobj's mode. | |
`fileobj' is not closed, when TarFile is closed. | |
""" | |
if len(mode) > 1 or mode not in "raw": | |
raise ValueError("mode must be 'r', 'a' or 'w'") | |
self.mode = mode | |
self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode] | |
if not fileobj: | |
if self.mode == "a" and not os.path.exists(name): | |
# Create nonexistent files in append mode. | |
self.mode = "w" | |
self._mode = "wb" | |
fileobj = bltn_open(name, self._mode) | |
self._extfileobj = False | |
else: | |
if name is None and hasattr(fileobj, "name"): | |
name = fileobj.name | |
if hasattr(fileobj, "mode"): | |
self._mode = fileobj.mode | |
self._extfileobj = True | |
self.name = os.path.abspath(name) if name else None | |
self.fileobj = fileobj | |
# Init attributes. | |
if format is not None: | |
self.format = format | |
if tarinfo is not None: | |
self.tarinfo = tarinfo | |
if dereference is not None: | |
self.dereference = dereference | |
if ignore_zeros is not None: | |
self.ignore_zeros = ignore_zeros | |
if encoding is not None: | |
self.encoding = encoding | |
self.errors = errors | |
if pax_headers is not None and self.format == PAX_FORMAT: | |
self.pax_headers = pax_headers | |
else: | |
self.pax_headers = {} | |
if debug is not None: | |
self.debug = debug | |
if errorlevel is not None: | |
self.errorlevel = errorlevel | |
# Init datastructures. | |
self.closed = False | |
self.members = [] # list of members as TarInfo objects | |
self._loaded = False # flag if all members have been read | |
self.offset = self.fileobj.tell() | |
# current position in the archive file | |
self.inodes = {} # dictionary caching the inodes of | |
# archive members already added | |
try: | |
if self.mode == "r": | |
self.firstmember = None | |
self.firstmember = self.next() | |
if self.mode == "a": | |
# Move to the end of the archive, | |
# before the first empty block. | |
while True: | |
self.fileobj.seek(self.offset) | |
try: | |
tarinfo = self.tarinfo.fromtarfile(self) | |
self.members.append(tarinfo) | |
except EOFHeaderError: | |
self.fileobj.seek(self.offset) | |
break | |
except HeaderError as e: | |
raise ReadError(str(e)) | |
if self.mode in "aw": | |
self._loaded = True | |
if self.pax_headers: | |
buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy()) | |
self.fileobj.write(buf) | |
self.offset += len(buf) | |
except: | |
if not self._extfileobj: | |
self.fileobj.close() | |
self.closed = True | |
raise | |
#-------------------------------------------------------------------------- | |
# Below are the classmethods which act as alternate constructors to the | |
# TarFile class. The open() method is the only one that is needed for | |
# public use; it is the "super"-constructor and is able to select an | |
# adequate "sub"-constructor for a particular compression using the mapping | |
# from OPEN_METH. | |
# | |
# This concept allows one to subclass TarFile without losing the comfort of | |
# the super-constructor. A sub-constructor is registered and made available | |
# by adding it to the mapping in OPEN_METH. | |
@classmethod | |
def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs): | |
"""Open a tar archive for reading, writing or appending. Return | |
an appropriate TarFile class. | |
mode: | |
'r' or 'r:*' open for reading with transparent compression | |
'r:' open for reading exclusively uncompressed | |
'r:gz' open for reading with gzip compression | |
'r:bz2' open for reading with bzip2 compression | |
'a' or 'a:' open for appending, creating the file if necessary | |
'w' or 'w:' open for writing without compression | |
'w:gz' open for writing with gzip compression | |
'w:bz2' open for writing with bzip2 compression | |
'r|*' open a stream of tar blocks with transparent compression | |
'r|' open an uncompressed stream of tar blocks for reading | |
'r|gz' open a gzip compressed stream of tar blocks | |
'r|bz2' open a bzip2 compressed stream of tar blocks | |
'w|' open an uncompressed stream for writing | |
'w|gz' open a gzip compressed stream for writing | |
'w|bz2' open a bzip2 compressed stream for writing | |
""" | |
if not name and not fileobj: | |
raise ValueError("nothing to open") | |
if mode in ("r", "r:*"): | |
# Find out which *open() is appropriate for opening the file. | |
for comptype in cls.OPEN_METH: | |
func = getattr(cls, cls.OPEN_METH[comptype]) | |
if fileobj is not None: | |
saved_pos = fileobj.tell() | |
try: | |
return func(name, "r", fileobj, **kwargs) | |
except (ReadError, CompressionError) as e: | |
if fileobj is not None: | |
fileobj.seek(saved_pos) | |
continue | |
raise ReadError("file could not be opened successfully") | |
elif ":" in mode: | |
filemode, comptype = mode.split(":", 1) | |
filemode = filemode or "r" | |
comptype = comptype or "tar" | |
# Select the *open() function according to | |
# given compression. | |
if comptype in cls.OPEN_METH: | |
func = getattr(cls, cls.OPEN_METH[comptype]) | |
else: | |
raise CompressionError("unknown compression type %r" % comptype) | |
return func(name, filemode, fileobj, **kwargs) | |
elif "|" in mode: | |
filemode, comptype = mode.split("|", 1) | |
filemode = filemode or "r" | |
comptype = comptype or "tar" | |
if filemode not in "rw": | |
raise ValueError("mode must be 'r' or 'w'") | |
stream = _Stream(name, filemode, comptype, fileobj, bufsize) | |
try: | |
t = cls(name, filemode, stream, **kwargs) | |
except: | |
stream.close() | |
raise | |
t._extfileobj = False | |
return t | |
elif mode in "aw": | |
return cls.taropen(name, mode, fileobj, **kwargs) | |
raise ValueError("undiscernible mode") | |
@classmethod | |
def taropen(cls, name, mode="r", fileobj=None, **kwargs): | |
"""Open uncompressed tar archive name for reading or writing. | |
""" | |
if len(mode) > 1 or mode not in "raw": | |
raise ValueError("mode must be 'r', 'a' or 'w'") | |
return cls(name, mode, fileobj, **kwargs) | |
@classmethod | |
def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): | |
"""Open gzip compressed tar archive name for reading or writing. | |
Appending is not allowed. | |
""" | |
if len(mode) > 1 or mode not in "rw": | |
raise ValueError("mode must be 'r' or 'w'") | |
try: | |
import gzip | |
gzip.GzipFile | |
except (ImportError, AttributeError): | |
raise CompressionError("gzip module is not available") | |
extfileobj = fileobj is not None | |
try: | |
fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj) | |
t = cls.taropen(name, mode, fileobj, **kwargs) | |
except IOError: | |
if not extfileobj and fileobj is not None: | |
fileobj.close() | |
if fileobj is None: | |
raise | |
raise ReadError("not a gzip file") | |
except: | |
if not extfileobj and fileobj is not None: | |
fileobj.close() | |
raise | |
t._extfileobj = extfileobj | |
return t | |
@classmethod | |
def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): | |
"""Open bzip2 compressed tar archive name for reading or writing. | |
Appending is not allowed. | |
""" | |
if len(mode) > 1 or mode not in "rw": | |
raise ValueError("mode must be 'r' or 'w'.") | |
try: | |
import bz2 | |
except ImportError: | |
raise CompressionError("bz2 module is not available") | |
if fileobj is not None: | |
fileobj = _BZ2Proxy(fileobj, mode) | |
else: | |
fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel) | |
try: | |
t = cls.taropen(name, mode, fileobj, **kwargs) | |
except (IOError, EOFError): | |
fileobj.close() | |
raise ReadError("not a bzip2 file") | |
t._extfileobj = False | |
return t | |
# All *open() methods are registered here. | |
OPEN_METH = { | |
"tar": "taropen", # uncompressed tar | |
"gz": "gzopen", # gzip compressed tar | |
"bz2": "bz2open" # bzip2 compressed tar | |
} | |
#-------------------------------------------------------------------------- | |
# The public methods which TarFile provides: | |
def close(self): | |
"""Close the TarFile. In write-mode, two finishing zero blocks are | |
appended to the archive. | |
""" | |
if self.closed: | |
return | |
if self.mode in "aw": | |
self.fileobj.write(NUL * (BLOCKSIZE * 2)) | |
self.offset += (BLOCKSIZE * 2) | |
# fill up the end with zero-blocks | |
# (like option -b20 for tar does) | |
blocks, remainder = divmod(self.offset, RECORDSIZE) | |
if remainder > 0: | |
self.fileobj.write(NUL * (RECORDSIZE - remainder)) | |
if not self._extfileobj: | |
self.fileobj.close() | |
self.closed = True | |
def getmember(self, name): | |
"""Return a TarInfo object for member `name'. If `name' can not be | |
found in the archive, KeyError is raised. If a member occurs more | |
than once in the archive, its last occurrence is assumed to be the | |
most up-to-date version. | |
""" | |
tarinfo = self._getmember(name) | |
if tarinfo is None: | |
raise KeyError("filename %r not found" % name) | |
return tarinfo | |
def getmembers(self): | |
"""Return the members of the archive as a list of TarInfo objects. The | |
list has the same order as the members in the archive. | |
""" | |
self._check() | |
if not self._loaded: # if we want to obtain a list of | |
self._load() # all members, we first have to | |
# scan the whole archive. | |
return self.members | |
def getnames(self): | |
"""Return the members of the archive as a list of their names. It has | |
the same order as the list returned by getmembers(). | |
""" | |
return [tarinfo.name for tarinfo in self.getmembers()] | |
def gettarinfo(self, name=None, arcname=None, fileobj=None): | |
"""Create a TarInfo object for either the file `name' or the file | |
object `fileobj' (using os.fstat on its file descriptor). You can | |
modify some of the TarInfo's attributes before you add it using | |
addfile(). If given, `arcname' specifies an alternative name for the | |
file in the archive. | |
""" | |
self._check("aw") | |
# When fileobj is given, replace name by | |
# fileobj's real name. | |
if fileobj is not None: | |
name = fileobj.name | |
# Building the name of the member in the archive. | |
# Backward slashes are converted to forward slashes, | |
# Absolute paths are turned to relative paths. | |
if arcname is None: | |
arcname = name | |
drv, arcname = os.path.splitdrive(arcname) | |
arcname = arcname.replace(os.sep, "/") | |
arcname = arcname.lstrip("/") | |
# Now, fill the TarInfo object with | |
# information specific for the file. | |
tarinfo = self.tarinfo() | |
tarinfo.tarfile = self | |
# Use os.stat or os.lstat, depending on platform | |
# and if symlinks shall be resolved. | |
if fileobj is None: | |
if hasattr(os, "lstat") and not self.dereference: | |
statres = os.lstat(name) | |
else: | |
statres = os.stat(name) | |
else: | |
statres = os.fstat(fileobj.fileno()) | |
linkname = "" | |
stmd = statres.st_mode | |
if stat.S_ISREG(stmd): | |
inode = (statres.st_ino, statres.st_dev) | |
if not self.dereference and statres.st_nlink > 1 and \ | |
inode in self.inodes and arcname != self.inodes[inode]: | |
# Is it a hardlink to an already | |
# archived file? | |
type = LNKTYPE | |
linkname = self.inodes[inode] | |
else: | |
# The inode is added only if its valid. | |
# For win32 it is always 0. | |
type = REGTYPE | |
if inode[0]: | |
self.inodes[inode] = arcname | |
elif stat.S_ISDIR(stmd): | |
type = DIRTYPE | |
elif stat.S_ISFIFO(stmd): | |
type = FIFOTYPE | |
elif stat.S_ISLNK(stmd): | |
type = SYMTYPE | |
linkname = os.readlink(name) | |
elif stat.S_ISCHR(stmd): | |
type = CHRTYPE | |
elif stat.S_ISBLK(stmd): | |
type = BLKTYPE | |
else: | |
return None | |
# Fill the TarInfo object with all | |
# information we can get. | |
tarinfo.name = arcname | |
tarinfo.mode = stmd | |
tarinfo.uid = statres.st_uid | |
tarinfo.gid = statres.st_gid | |
if type == REGTYPE: | |
tarinfo.size = statres.st_size | |
else: | |
tarinfo.size = 0 | |
tarinfo.mtime = statres.st_mtime | |
tarinfo.type = type | |
tarinfo.linkname = linkname | |
if pwd: | |
try: | |
tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0] | |
except KeyError: | |
pass | |
if grp: | |
try: | |
tarinfo.gname = grp.getgrgid(tarinfo.gid)[0] | |
except KeyError: | |
pass | |
if type in (CHRTYPE, BLKTYPE): | |
if hasattr(os, "major") and hasattr(os, "minor"): | |
tarinfo.devmajor = os.major(statres.st_rdev) | |
tarinfo.devminor = os.minor(statres.st_rdev) | |
return tarinfo | |
def list(self, verbose=True): | |
"""Print a table of contents to sys.stdout. If `verbose' is False, only | |
the names of the members are printed. If it is True, an `ls -l'-like | |
output is produced. | |
""" | |
self._check() | |
for tarinfo in self: | |
if verbose: | |
print(filemode(tarinfo.mode), end=' ') | |
print("%s/%s" % (tarinfo.uname or tarinfo.uid, | |
tarinfo.gname or tarinfo.gid), end=' ') | |
if tarinfo.ischr() or tarinfo.isblk(): | |
print("%10s" % ("%d,%d" \ | |
% (tarinfo.devmajor, tarinfo.devminor)), end=' ') | |
else: | |
print("%10d" % tarinfo.size, end=' ') | |
print("%d-%02d-%02d %02d:%02d:%02d" \ | |
% time.localtime(tarinfo.mtime)[:6], end=' ') | |
print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ') | |
if verbose: | |
if tarinfo.issym(): | |
print("->", tarinfo.linkname, end=' ') | |
if tarinfo.islnk(): | |
print("link to", tarinfo.linkname, end=' ') | |
print() | |
def add(self, name, arcname=None, recursive=True, exclude=None, filter=None): | |
"""Add the file `name' to the archive. `name' may be any type of file | |
(directory, fifo, symbolic link, etc.). If given, `arcname' | |
specifies an alternative name for the file in the archive. | |
Directories are added recursively by default. This can be avoided by | |
setting `recursive' to False. `exclude' is a function that should | |
return True for each filename to be excluded. `filter' is a function | |
that expects a TarInfo object argument and returns the changed | |
TarInfo object, if it returns None the TarInfo object will be | |
excluded from the archive. | |
""" | |
self._check("aw") | |
if arcname is None: | |
arcname = name | |
# Exclude pathnames. | |
if exclude is not None: | |
import warnings | |
warnings.warn("use the filter argument instead", | |
DeprecationWarning, 2) | |
if exclude(name): | |
self._dbg(2, "tarfile: Excluded %r" % name) | |
return | |
# Skip if somebody tries to archive the archive... | |
if self.name is not None and os.path.abspath(name) == self.name: | |
self._dbg(2, "tarfile: Skipped %r" % name) | |
return | |
self._dbg(1, name) | |
# Create a TarInfo object from the file. | |
tarinfo = self.gettarinfo(name, arcname) | |
if tarinfo is None: | |
self._dbg(1, "tarfile: Unsupported type %r" % name) | |
return | |
# Change or exclude the TarInfo object. | |
if filter is not None: | |
tarinfo = filter(tarinfo) | |
if tarinfo is None: | |
self._dbg(2, "tarfile: Excluded %r" % name) | |
return | |
# Append the tar header and data to the archive. | |
if tarinfo.isreg(): | |
f = bltn_open(name, "rb") | |
self.addfile(tarinfo, f) | |
f.close() | |
elif tarinfo.isdir(): | |
self.addfile(tarinfo) | |
if recursive: | |
for f in os.listdir(name): | |
self.add(os.path.join(name, f), os.path.join(arcname, f), | |
recursive, exclude, filter=filter) | |
else: | |
self.addfile(tarinfo) | |
def addfile(self, tarinfo, fileobj=None): | |
"""Add the TarInfo object `tarinfo' to the archive. If `fileobj' is | |
given, tarinfo.size bytes are read from it and added to the archive. | |
You can create TarInfo objects using gettarinfo(). | |
On Windows platforms, `fileobj' should always be opened with mode | |
'rb' to avoid irritation about the file size. | |
""" | |
self._check("aw") | |
tarinfo = copy.copy(tarinfo) | |
buf = tarinfo.tobuf(self.format, self.encoding, self.errors) | |
self.fileobj.write(buf) | |
self.offset += len(buf) | |
# If there's data to follow, append it. | |
if fileobj is not None: | |
copyfileobj(fileobj, self.fileobj, tarinfo.size) | |
blocks, remainder = divmod(tarinfo.size, BLOCKSIZE) | |
if remainder > 0: | |
self.fileobj.write(NUL * (BLOCKSIZE - remainder)) | |
blocks += 1 | |
self.offset += blocks * BLOCKSIZE | |
self.members.append(tarinfo) | |
def extractall(self, path=".", members=None): | |
"""Extract all members from the archive to the current working | |
directory and set owner, modification time and permissions on | |
directories afterwards. `path' specifies a different directory | |
to extract to. `members' is optional and must be a subset of the | |
list returned by getmembers(). | |
""" | |
directories = [] | |
if members is None: | |
members = self | |
for tarinfo in members: | |
if tarinfo.isdir(): | |
# Extract directories with a safe mode. | |
directories.append(tarinfo) | |
tarinfo = copy.copy(tarinfo) | |
tarinfo.mode = 0o700 | |
# Do not set_attrs directories, as we will do that further down | |
self.extract(tarinfo, path, set_attrs=not tarinfo.isdir()) | |
# Reverse sort directories. | |
directories.sort(key=lambda a: a.name) | |
directories.reverse() | |
# Set correct owner, mtime and filemode on directories. | |
for tarinfo in directories: | |
dirpath = os.path.join(path, tarinfo.name) | |
try: | |
self.chown(tarinfo, dirpath) | |
self.utime(tarinfo, dirpath) | |
self.chmod(tarinfo, dirpath) | |
except ExtractError as e: | |
if self.errorlevel > 1: | |
raise | |
else: | |
self._dbg(1, "tarfile: %s" % e) | |
def extract(self, member, path="", set_attrs=True): | |
"""Extract a member from the archive to the current working directory, | |
using its full name. Its file information is extracted as accurately | |
as possible. `member' may be a filename or a TarInfo object. You can | |
specify a different directory using `path'. File attributes (owner, | |
mtime, mode) are set unless `set_attrs' is False. | |
""" | |
self._check("r") | |
if isinstance(member, str): | |
tarinfo = self.getmember(member) | |
else: | |
tarinfo = member | |
# Prepare the link target for makelink(). | |
if tarinfo.islnk(): | |
tarinfo._link_target = os.path.join(path, tarinfo.linkname) | |
try: | |
self._extract_member(tarinfo, os.path.join(path, tarinfo.name), | |
set_attrs=set_attrs) | |
except EnvironmentError as e: | |
if self.errorlevel > 0: | |
raise | |
else: | |
if e.filename is None: | |
self._dbg(1, "tarfile: %s" % e.strerror) | |
else: | |
self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename)) | |
except ExtractError as e: | |
if self.errorlevel > 1: | |
raise | |
else: | |
self._dbg(1, "tarfile: %s" % e) | |
def extractfile(self, member): | |
"""Extract a member from the archive as a file object. `member' may be | |
a filename or a TarInfo object. If `member' is a regular file, a | |
file-like object is returned. If `member' is a link, a file-like | |
object is constructed from the link's target. If `member' is none of | |
the above, None is returned. | |
The file-like object is read-only and provides the following | |
methods: read(), readline(), readlines(), seek() and tell() | |
""" | |
self._check("r") | |
if isinstance(member, str): | |
tarinfo = self.getmember(member) | |
else: | |
tarinfo = member | |
if tarinfo.isreg(): | |
return self.fileobject(self, tarinfo) | |
elif tarinfo.type not in SUPPORTED_TYPES: | |
# If a member's type is unknown, it is treated as a | |
# regular file. | |
return self.fileobject(self, tarinfo) | |
elif tarinfo.islnk() or tarinfo.issym(): | |
if isinstance(self.fileobj, _Stream): | |
# A small but ugly workaround for the case that someone tries | |
# to extract a (sym)link as a file-object from a non-seekable | |
# stream of tar blocks. | |
raise StreamError("cannot extract (sym)link as file object") | |
else: | |
# A (sym)link's file object is its target's file object. | |
return self.extractfile(self._find_link_target(tarinfo)) | |
else: | |
# If there's no data associated with the member (directory, chrdev, | |
# blkdev, etc.), return None instead of a file object. | |
return None | |
def _extract_member(self, tarinfo, targetpath, set_attrs=True): | |
"""Extract the TarInfo object tarinfo to a physical | |
file called targetpath. | |
""" | |
# Fetch the TarInfo object for the given name | |
# and build the destination pathname, replacing | |
# forward slashes to platform specific separators. | |
targetpath = targetpath.rstrip("/") | |
targetpath = targetpath.replace("/", os.sep) | |
# Create all upper directories. | |
upperdirs = os.path.dirname(targetpath) | |
if upperdirs and not os.path.exists(upperdirs): | |
# Create directories that are not part of the archive with | |
# default permissions. | |
os.makedirs(upperdirs) | |
if tarinfo.islnk() or tarinfo.issym(): | |
self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname)) | |
else: | |
self._dbg(1, tarinfo.name) | |
if tarinfo.isreg(): | |
self.makefile(tarinfo, targetpath) | |
elif tarinfo.isdir(): | |
self.makedir(tarinfo, targetpath) | |
elif tarinfo.isfifo(): | |
self.makefifo(tarinfo, targetpath) | |
elif tarinfo.ischr() or tarinfo.isblk(): | |
self.makedev(tarinfo, targetpath) | |
elif tarinfo.islnk() or tarinfo.issym(): | |
self.makelink(tarinfo, targetpath) | |
elif tarinfo.type not in SUPPORTED_TYPES: | |
self.makeunknown(tarinfo, targetpath) | |
else: | |
self.makefile(tarinfo, targetpath) | |
if set_attrs: | |
self.chown(tarinfo, targetpath) | |
if not tarinfo.issym(): | |
self.chmod(tarinfo, targetpath) | |
self.utime(tarinfo, targetpath) | |
#-------------------------------------------------------------------------- | |
# Below are the different file methods. They are called via | |
# _extract_member() when extract() is called. They can be replaced in a | |
# subclass to implement other functionality. | |
def makedir(self, tarinfo, targetpath): | |
"""Make a directory called targetpath. | |
""" | |
try: | |
# Use a safe mode for the directory, the real mode is set | |
# later in _extract_member(). | |
os.mkdir(targetpath, 0o700) | |
except EnvironmentError as e: | |
if e.errno != errno.EEXIST: | |
raise | |
def makefile(self, tarinfo, targetpath): | |
"""Make a file called targetpath. | |
""" | |
source = self.fileobj | |
source.seek(tarinfo.offset_data) | |
target = bltn_open(targetpath, "wb") | |
if tarinfo.sparse is not None: | |
for offset, size in tarinfo.sparse: | |
target.seek(offset) | |
copyfileobj(source, target, size) | |
else: | |
copyfileobj(source, target, tarinfo.size) | |
target.seek(tarinfo.size) | |
target.truncate() | |
target.close() | |
def makeunknown(self, tarinfo, targetpath): | |
"""Make a file from a TarInfo object with an unknown type | |
at targetpath. | |
""" | |
self.makefile(tarinfo, targetpath) | |
self._dbg(1, "tarfile: Unknown file type %r, " \ | |
"extracted as regular file." % tarinfo.type) | |
def makefifo(self, tarinfo, targetpath): | |
"""Make a fifo called targetpath. | |
""" | |
if hasattr(os, "mkfifo"): | |
os.mkfifo(targetpath) | |
else: | |
raise ExtractError("fifo not supported by system") | |
def makedev(self, tarinfo, targetpath): | |
"""Make a character or block device called targetpath. | |
""" | |
if not hasattr(os, "mknod") or not hasattr(os, "makedev"): | |
raise ExtractError("special devices not supported by system") | |
mode = tarinfo.mode | |
if tarinfo.isblk(): | |
mode |= stat.S_IFBLK | |
else: | |
mode |= stat.S_IFCHR | |
os.mknod(targetpath, mode, | |
os.makedev(tarinfo.devmajor, tarinfo.devminor)) | |
def makelink(self, tarinfo, targetpath): | |
"""Make a (symbolic) link called targetpath. If it cannot be created | |
(platform limitation), we try to make a copy of the referenced file | |
instead of a link. | |
""" | |
try: | |
# For systems that support symbolic and hard links. | |
if tarinfo.issym(): | |
os.symlink(tarinfo.linkname, targetpath) | |
else: | |
# See extract(). | |
if os.path.exists(tarinfo._link_target): | |
os.link(tarinfo._link_target, targetpath) | |
else: | |
self._extract_member(self._find_link_target(tarinfo), | |
targetpath) | |
except symlink_exception: | |
if tarinfo.issym(): | |
linkpath = os.path.join(os.path.dirname(tarinfo.name), | |
tarinfo.linkname) | |
else: | |
linkpath = tarinfo.linkname | |
else: | |
try: | |
self._extract_member(self._find_link_target(tarinfo), | |
targetpath) | |
except KeyError: | |
raise ExtractError("unable to resolve link inside archive") | |
def chown(self, tarinfo, targetpath): | |
"""Set owner of targetpath according to tarinfo. | |
""" | |
if pwd and hasattr(os, "geteuid") and os.geteuid() == 0: | |
# We have to be root to do so. | |
try: | |
g = grp.getgrnam(tarinfo.gname)[2] | |
except KeyError: | |
g = tarinfo.gid | |
try: | |
u = pwd.getpwnam(tarinfo.uname)[2] | |
except KeyError: | |
u = tarinfo.uid | |
try: | |
if tarinfo.issym() and hasattr(os, "lchown"): | |
os.lchown(targetpath, u, g) | |
else: | |
if sys.platform != "os2emx": | |
os.chown(targetpath, u, g) | |
except EnvironmentError as e: | |
raise ExtractError("could not change owner") | |
def chmod(self, tarinfo, targetpath): | |
"""Set file permissions of targetpath according to tarinfo. | |
""" | |
if hasattr(os, 'chmod'): | |
try: | |
os.chmod(targetpath, tarinfo.mode) | |
except EnvironmentError as e: | |
raise ExtractError("could not change mode") | |
def utime(self, tarinfo, targetpath): | |
"""Set modification time of targetpath according to tarinfo. | |
""" | |
if not hasattr(os, 'utime'): | |
return | |
try: | |
os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime)) | |
except EnvironmentError as e: | |
raise ExtractError("could not change modification time") | |
#-------------------------------------------------------------------------- | |
def next(self): | |
"""Return the next member of the archive as a TarInfo object, when | |
TarFile is opened for reading. Return None if there is no more | |
available. | |
""" | |
self._check("ra") | |
if self.firstmember is not None: | |
m = self.firstmember | |
self.firstmember = None | |
return m | |
# Read the next block. | |
self.fileobj.seek(self.offset) | |
tarinfo = None | |
while True: | |
try: | |
tarinfo = self.tarinfo.fromtarfile(self) | |
except EOFHeaderError as e: | |
if self.ignore_zeros: | |
self._dbg(2, "0x%X: %s" % (self.offset, e)) | |
self.offset += BLOCKSIZE | |
continue | |
except InvalidHeaderError as e: | |
if self.ignore_zeros: | |
self._dbg(2, "0x%X: %s" % (self.offset, e)) | |
self.offset += BLOCKSIZE | |
continue | |
elif self.offset == 0: | |
raise ReadError(str(e)) | |
except EmptyHeaderError: | |
if self.offset == 0: | |
raise ReadError("empty file") | |
except TruncatedHeaderError as e: | |
if self.offset == 0: | |
raise ReadError(str(e)) | |
except SubsequentHeaderError as e: | |
raise ReadError(str(e)) | |
break | |
if tarinfo is not None: | |
self.members.append(tarinfo) | |
else: | |
self._loaded = True | |
return tarinfo | |
#-------------------------------------------------------------------------- | |
# Little helper methods: | |
def _getmember(self, name, tarinfo=None, normalize=False): | |
"""Find an archive member by name from bottom to top. | |
If tarinfo is given, it is used as the starting point. | |
""" | |
# Ensure that all members have been loaded. | |
members = self.getmembers() | |
# Limit the member search list up to tarinfo. | |
if tarinfo is not None: | |
members = members[:members.index(tarinfo)] | |
if normalize: | |
name = os.path.normpath(name) | |
for member in reversed(members): | |
if normalize: | |
member_name = os.path.normpath(member.name) | |
else: | |
member_name = member.name | |
if name == member_name: | |
return member | |
def _load(self): | |
"""Read through the entire archive file and look for readable | |
members. | |
""" | |
while True: | |
tarinfo = self.next() | |
if tarinfo is None: | |
break | |
self._loaded = True | |
def _check(self, mode=None): | |
"""Check if TarFile is still open, and if the operation's mode | |
corresponds to TarFile's mode. | |
""" | |
if self.closed: | |
raise IOError("%s is closed" % self.__class__.__name__) | |
if mode is not None and self.mode not in mode: | |
raise IOError("bad operation for mode %r" % self.mode) | |
def _find_link_target(self, tarinfo): | |
"""Find the target member of a symlink or hardlink member in the | |
archive. | |
""" | |
if tarinfo.issym(): | |
# Always search the entire archive. | |
linkname = os.path.dirname(tarinfo.name) + "/" + tarinfo.linkname | |
limit = None | |
else: | |
# Search the archive before the link, because a hard link is | |
# just a reference to an already archived file. | |
linkname = tarinfo.linkname | |
limit = tarinfo | |
member = self._getmember(linkname, tarinfo=limit, normalize=True) | |
if member is None: | |
raise KeyError("linkname %r not found" % linkname) | |
return member | |
def __iter__(self): | |
"""Provide an iterator object. | |
""" | |
if self._loaded: | |
return iter(self.members) | |
else: | |
return TarIter(self) | |
def _dbg(self, level, msg): | |
"""Write debugging output to sys.stderr. | |
""" | |
if level <= self.debug: | |
print(msg, file=sys.stderr) | |
def __enter__(self): | |
self._check() | |
return self | |
def __exit__(self, type, value, traceback): | |
if type is None: | |
self.close() | |
else: | |
# An exception occurred. We must not call close() because | |
# it would try to write end-of-archive blocks and padding. | |
if not self._extfileobj: | |
self.fileobj.close() | |
self.closed = True | |
# class TarFile | |
class TarIter(object): | |
"""Iterator Class. | |
for tarinfo in TarFile(...): | |
suite... | |
""" | |
def __init__(self, tarfile): | |
"""Construct a TarIter object. | |
""" | |
self.tarfile = tarfile | |
self.index = 0 | |
def __iter__(self): | |
"""Return iterator object. | |
""" | |
return self | |
def __next__(self): | |
"""Return the next item using TarFile's next() method. | |
When all members have been read, set TarFile as _loaded. | |
""" | |
# Fix for SF #1100429: Under rare circumstances it can | |
# happen that getmembers() is called during iteration, | |
# which will cause TarIter to stop prematurely. | |
if not self.tarfile._loaded: | |
tarinfo = self.tarfile.next() | |
if not tarinfo: | |
self.tarfile._loaded = True | |
raise StopIteration | |
else: | |
try: | |
tarinfo = self.tarfile.members[self.index] | |
except IndexError: | |
raise StopIteration | |
self.index += 1 | |
return tarinfo | |
next = __next__ # for Python 2.x | |
#-------------------- | |
# exported functions | |
#-------------------- | |
def is_tarfile(name): | |
"""Return True if name points to a tar archive that we | |
are able to handle, else return False. | |
""" | |
try: | |
t = open(name) | |
t.close() | |
return True | |
except TarError: | |
return False | |
bltn_open = open | |
open = TarFile.open |
# -*- coding: utf-8 -*- | |
# | |
# Copyright (C) 2013-2016 Vinay Sajip. | |
# Licensed to the Python Software Foundation under a contributor agreement. | |
# See LICENSE.txt and CONTRIBUTORS.txt. | |
# | |
from __future__ import absolute_import | |
import os | |
import re | |
import sys | |
try: | |
import ssl | |
except ImportError: | |
ssl = None | |
if sys.version_info[0] < 3: # pragma: no cover | |
from StringIO import StringIO | |
string_types = basestring, | |
text_type = unicode | |
from types import FileType as file_type | |
import __builtin__ as builtins | |
import ConfigParser as configparser | |
from ._backport import shutil | |
from urlparse import urlparse, urlunparse, urljoin, urlsplit, urlunsplit | |
from urllib import (urlretrieve, quote as _quote, unquote, url2pathname, | |
pathname2url, ContentTooShortError, splittype) | |
def quote(s): | |
if isinstance(s, unicode): | |
s = s.encode('utf-8') | |
return _quote(s) | |
import urllib2 | |
from urllib2 import (Request, urlopen, URLError, HTTPError, | |
HTTPBasicAuthHandler, HTTPPasswordMgr, | |
HTTPHandler, HTTPRedirectHandler, | |
build_opener) | |
if ssl: | |
from urllib2 import HTTPSHandler | |
import httplib | |
import xmlrpclib | |
import Queue as queue | |
from HTMLParser import HTMLParser | |
import htmlentitydefs | |
raw_input = raw_input | |
from itertools import ifilter as filter | |
from itertools import ifilterfalse as filterfalse | |
_userprog = None | |
def splituser(host): | |
"""splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" | |
global _userprog | |
if _userprog is None: | |
import re | |
_userprog = re.compile('^(.*)@(.*)$') | |
match = _userprog.match(host) | |
if match: return match.group(1, 2) | |
return None, host | |
else: # pragma: no cover | |
from io import StringIO | |
string_types = str, | |
text_type = str | |
from io import TextIOWrapper as file_type | |
import builtins | |
import configparser | |
import shutil | |
from urllib.parse import (urlparse, urlunparse, urljoin, splituser, quote, | |
unquote, urlsplit, urlunsplit, splittype) | |
from urllib.request import (urlopen, urlretrieve, Request, url2pathname, | |
pathname2url, | |
HTTPBasicAuthHandler, HTTPPasswordMgr, | |
HTTPHandler, HTTPRedirectHandler, | |
build_opener) | |
if ssl: | |
from urllib.request import HTTPSHandler | |
from urllib.error import HTTPError, URLError, ContentTooShortError | |
import http.client as httplib | |
import urllib.request as urllib2 | |
import xmlrpc.client as xmlrpclib | |
import queue | |
from html.parser import HTMLParser | |
import html.entities as htmlentitydefs | |
raw_input = input | |
from itertools import filterfalse | |
filter = filter | |
try: | |
from ssl import match_hostname, CertificateError | |
except ImportError: # pragma: no cover | |
class CertificateError(ValueError): | |
pass | |
def _dnsname_match(dn, hostname, max_wildcards=1): | |
"""Matching according to RFC 6125, section 6.4.3 | |
http://tools.ietf.org/html/rfc6125#section-6.4.3 | |
""" | |
pats = [] | |
if not dn: | |
return False | |
parts = dn.split('.') | |
leftmost, remainder = parts[0], parts[1:] | |
wildcards = leftmost.count('*') | |
if wildcards > max_wildcards: | |
# Issue #17980: avoid denials of service by refusing more | |
# than one wildcard per fragment. A survey of established | |
# policy among SSL implementations showed it to be a | |
# reasonable choice. | |
raise CertificateError( | |
"too many wildcards in certificate DNS name: " + repr(dn)) | |
# speed up common case w/o wildcards | |
if not wildcards: | |
return dn.lower() == hostname.lower() | |
# RFC 6125, section 6.4.3, subitem 1. | |
# The client SHOULD NOT attempt to match a presented identifier in which | |
# the wildcard character comprises a label other than the left-most label. | |
if leftmost == '*': | |
# When '*' is a fragment by itself, it matches a non-empty dotless | |
# fragment. | |
pats.append('[^.]+') | |
elif leftmost.startswith('xn--') or hostname.startswith('xn--'): | |
# RFC 6125, section 6.4.3, subitem 3. | |
# The client SHOULD NOT attempt to match a presented identifier | |
# where the wildcard character is embedded within an A-label or | |
# U-label of an internationalized domain name. | |
pats.append(re.escape(leftmost)) | |
else: | |
# Otherwise, '*' matches any dotless string, e.g. www* | |
pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) | |
# add the remaining fragments, ignore any wildcards | |
for frag in remainder: | |
pats.append(re.escape(frag)) | |
pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) | |
return pat.match(hostname) | |
def match_hostname(cert, hostname): | |
"""Verify that *cert* (in decoded format as returned by | |
SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 | |
rules are followed, but IP addresses are not accepted for *hostname*. | |
CertificateError is raised on failure. On success, the function | |
returns nothing. | |
""" | |
if not cert: | |
raise ValueError("empty or no certificate, match_hostname needs a " | |
"SSL socket or SSL context with either " | |
"CERT_OPTIONAL or CERT_REQUIRED") | |
dnsnames = [] | |
san = cert.get('subjectAltName', ()) | |
for key, value in san: | |
if key == 'DNS': | |
if _dnsname_match(value, hostname): | |
return | |
dnsnames.append(value) | |
if not dnsnames: | |
# The subject is only checked when there is no dNSName entry | |
# in subjectAltName | |
for sub in cert.get('subject', ()): | |
for key, value in sub: | |
# XXX according to RFC 2818, the most specific Common Name | |
# must be used. | |
if key == 'commonName': | |
if _dnsname_match(value, hostname): | |
return | |
dnsnames.append(value) | |
if len(dnsnames) > 1: | |
raise CertificateError("hostname %r " | |
"doesn't match either of %s" | |
% (hostname, ', '.join(map(repr, dnsnames)))) | |
elif len(dnsnames) == 1: | |
raise CertificateError("hostname %r " | |
"doesn't match %r" | |
% (hostname, dnsnames[0])) | |
else: | |
raise CertificateError("no appropriate commonName or " | |
"subjectAltName fields were found") | |
try: | |
from types import SimpleNamespace as Container | |
except ImportError: # pragma: no cover | |
class Container(object): | |
""" | |
A generic container for when multiple values need to be returned | |
""" | |
def __init__(self, **kwargs): | |
self.__dict__.update(kwargs) | |
try: | |
from shutil import which | |
except ImportError: # pragma: no cover | |
# Implementation from Python 3.3 | |
def which(cmd, mode=os.F_OK | os.X_OK, path=None): | |
"""Given a command, mode, and a PATH string, return the path which | |
conforms to the given mode on the PATH, or None if there is no such | |
file. | |
`mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result | |
of os.environ.get("PATH"), or can be overridden with a custom search | |
path. | |
""" | |
# Check that a given file can be accessed with the correct mode. | |
# Additionally check that `file` is not a directory, as on Windows | |
# directories pass the os.access check. | |
def _access_check(fn, mode): | |
return (os.path.exists(fn) and os.access(fn, mode) | |
and not os.path.isdir(fn)) | |
# If we're given a path with a directory part, look it up directly rather | |
# than referring to PATH directories. This includes checking relative to the | |
# current directory, e.g. ./script | |
if os.path.dirname(cmd): | |
if _access_check(cmd, mode): | |
return cmd | |
return None | |
if path is None: | |
path = os.environ.get("PATH", os.defpath) | |
if not path: | |
return None | |
path = path.split(os.pathsep) | |
if sys.platform == "win32": | |
# The current directory takes precedence on Windows. | |
if not os.curdir in path: | |
path.insert(0, os.curdir) | |
# PATHEXT is necessary to check on Windows. | |
pathext = os.environ.get("PATHEXT", "").split(os.pathsep) | |
# See if the given file matches any of the expected path extensions. | |
# This will allow us to short circuit when given "python.exe". | |
# If it does match, only test that one, otherwise we have to try | |
# others. | |
if any(cmd.lower().endswith(ext.lower()) for ext in pathext): | |
files = [cmd] | |
else: | |
files = [cmd + ext for ext in pathext] | |
else: | |
# On other platforms you don't have things like PATHEXT to tell you | |
# what file suffixes are executable, so just pass on cmd as-is. | |
files = [cmd] | |
seen = set() | |
for dir in path: | |
normdir = os.path.normcase(dir) | |
if not normdir in seen: | |
seen.add(normdir) | |
for thefile in files: | |
name = os.path.join(dir, thefile) | |
if _access_check(name, mode): | |
return name | |
return None | |
# ZipFile is a context manager in 2.7, but not in 2.6 | |
from zipfile import ZipFile as BaseZipFile | |
if hasattr(BaseZipFile, '__enter__'): # pragma: no cover | |
ZipFile = BaseZipFile | |
else: | |
from zipfile import ZipExtFile as BaseZipExtFile | |
class ZipExtFile(BaseZipExtFile): | |
def __init__(self, base): | |
self.__dict__.update(base.__dict__) | |
def __enter__(self): | |
return self | |
def __exit__(self, *exc_info): | |
self.close() | |
# return None, so if an exception occurred, it will propagate | |
class ZipFile(BaseZipFile): | |
def __enter__(self): | |
return self | |
def __exit__(self, *exc_info): | |
self.close() | |
# return None, so if an exception occurred, it will propagate | |
def open(self, *args, **kwargs): | |
base = BaseZipFile.open(self, *args, **kwargs) | |
return ZipExtFile(base) | |
try: | |
from platform import python_implementation | |
except ImportError: # pragma: no cover | |
def python_implementation(): | |
"""Return a string identifying the Python implementation.""" | |
if 'PyPy' in sys.version: | |
return 'PyPy' | |
if os.name == 'java': | |
return 'Jython' | |
if sys.version.startswith('IronPython'): | |
return 'IronPython' | |
return 'CPython' | |
try: | |
import sysconfig | |
except ImportError: # pragma: no cover | |
from ._backport import sysconfig | |
try: | |
callable = callable | |
except NameError: # pragma: no cover | |
from collections import Callable | |
def callable(obj): | |
return isinstance(obj, Callable) | |
try: | |
fsencode = os.fsencode | |
fsdecode = os.fsdecode | |
except AttributeError: # pragma: no cover | |
_fsencoding = sys.getfilesystemencoding() | |
if _fsencoding == 'mbcs': | |
_fserrors = 'strict' | |
else: | |
_fserrors = 'surrogateescape' | |
def fsencode(filename): | |
if isinstance(filename, bytes): | |
return filename | |
elif isinstance(filename, text_type): | |
return filename.encode(_fsencoding, _fserrors) | |
else: | |
raise TypeError("expect bytes or str, not %s" % | |
type(filename).__name__) | |
def fsdecode(filename): | |
if isinstance(filename, text_type): | |
return filename | |
elif isinstance(filename, bytes): | |
return filename.decode(_fsencoding, _fserrors) | |
else: | |
raise TypeError("expect bytes or str, not %s" % | |
type(filename).__name__) | |
try: | |
from tokenize import detect_encoding | |
except ImportError: # pragma: no cover | |
from codecs import BOM_UTF8, lookup | |
import re | |
cookie_re = re.compile("coding[:=]\s*([-\w.]+)") | |
def _get_normal_name(orig_enc): | |
"""Imitates get_normal_name in tokenizer.c.""" | |
# Only care about the first 12 characters. | |
enc = orig_enc[:12].lower().replace("_", "-") | |
if enc == "utf-8" or enc.startswith("utf-8-"): | |
return "utf-8" | |
if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \ | |
enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")): | |
return "iso-8859-1" | |
return orig_enc | |
def detect_encoding(readline): | |
""" | |
The detect_encoding() function is used to detect the encoding that should | |
be used to decode a Python source file. It requires one argument, readline, | |
in the same way as the tokenize() generator. | |
It will call readline a maximum of twice, and return the encoding used | |
(as a string) and a list of any lines (left as bytes) it has read in. | |
It detects the encoding from the presence of a utf-8 bom or an encoding | |
cookie as specified in pep-0263. If both a bom and a cookie are present, | |
but disagree, a SyntaxError will be raised. If the encoding cookie is an | |
invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found, | |
'utf-8-sig' is returned. | |
If no encoding is specified, then the default of 'utf-8' will be returned. | |
""" | |
try: | |
filename = readline.__self__.name | |
except AttributeError: | |
filename = None | |
bom_found = False | |
encoding = None | |
default = 'utf-8' | |
def read_or_stop(): | |
try: | |
return readline() | |
except StopIteration: | |
return b'' | |
def find_cookie(line): | |
try: | |
# Decode as UTF-8. Either the line is an encoding declaration, | |
# in which case it should be pure ASCII, or it must be UTF-8 | |
# per default encoding. | |
line_string = line.decode('utf-8') | |
except UnicodeDecodeError: | |
msg = "invalid or missing encoding declaration" | |
if filename is not None: | |
msg = '{} for {!r}'.format(msg, filename) | |
raise SyntaxError(msg) | |
matches = cookie_re.findall(line_string) | |
if not matches: | |
return None | |
encoding = _get_normal_name(matches[0]) | |
try: | |
codec = lookup(encoding) | |
except LookupError: | |
# This behaviour mimics the Python interpreter | |
if filename is None: | |
msg = "unknown encoding: " + encoding | |
else: | |
msg = "unknown encoding for {!r}: {}".format(filename, | |
encoding) | |
raise SyntaxError(msg) | |
if bom_found: | |
if codec.name != 'utf-8': | |
# This behaviour mimics the Python interpreter | |
if filename is None: | |
msg = 'encoding problem: utf-8' | |
else: | |
msg = 'encoding problem for {!r}: utf-8'.format(filename) | |
raise SyntaxError(msg) | |
encoding += '-sig' | |
return encoding | |
first = read_or_stop() | |
if first.startswith(BOM_UTF8): | |
bom_found = True | |
first = first[3:] | |
default = 'utf-8-sig' | |
if not first: | |
return default, [] | |
encoding = find_cookie(first) | |
if encoding: | |
return encoding, [first] | |
second = read_or_stop() | |
if not second: | |
return default, [first] | |
encoding = find_cookie(second) | |
if encoding: | |
return encoding, [first, second] | |
return default, [first, second] | |
# For converting & <-> & etc. | |
try: | |
from html import escape | |
except ImportError: | |
from cgi import escape | |
if sys.version_info[:2] < (3, 4): | |
unescape = HTMLParser().unescape | |
else: | |
from html import unescape | |
try: | |
from collections import ChainMap | |
except ImportError: # pragma: no cover | |
from collections import MutableMapping | |
try: | |
from reprlib import recursive_repr as _recursive_repr | |
except ImportError: | |
def _recursive_repr(fillvalue='...'): | |
''' | |
Decorator to make a repr function return fillvalue for a recursive | |
call | |
''' | |
def decorating_function(user_function): | |
repr_running = set() | |
def wrapper(self): | |
key = id(self), get_ident() | |
if key in repr_running: | |
return fillvalue | |
repr_running.add(key) | |
try: | |
result = user_function(self) | |
finally: | |
repr_running.discard(key) | |
return result | |
# Can't use functools.wraps() here because of bootstrap issues | |
wrapper.__module__ = getattr(user_function, '__module__') | |
wrapper.__doc__ = getattr(user_function, '__doc__') | |
wrapper.__name__ = getattr(user_function, '__name__') | |
wrapper.__annotations__ = getattr(user_function, '__annotations__', {}) | |
return wrapper | |
return decorating_function | |
class ChainMap(MutableMapping): | |
''' A ChainMap groups multiple dicts (or other mappings) together | |
to create a single, updateable view. | |
The underlying mappings are stored in a list. That list is public and can | |
accessed or updated using the *maps* attribute. There is no other state. | |
Lookups search the underlying mappings successively until a key is found. | |
In contrast, writes, updates, and deletions only operate on the first | |
mapping. | |
''' | |
def __init__(self, *maps): | |
'''Initialize a ChainMap by setting *maps* to the given mappings. | |
If no mappings are provided, a single empty dictionary is used. | |
''' | |
self.maps = list(maps) or [{}] # always at least one map | |
def __missing__(self, key): | |
raise KeyError(key) | |
def __getitem__(self, key): | |
for mapping in self.maps: | |
try: | |
return mapping[key] # can't use 'key in mapping' with defaultdict | |
except KeyError: | |
pass | |
return self.__missing__(key) # support subclasses that define __missing__ | |
def get(self, key, default=None): | |
return self[key] if key in self else default | |
def __len__(self): | |
return len(set().union(*self.maps)) # reuses stored hash values if possible | |
def __iter__(self): | |
return iter(set().union(*self.maps)) | |
def __contains__(self, key): | |
return any(key in m for m in self.maps) | |
def __bool__(self): | |
return any(self.maps) | |
@_recursive_repr() | |
def __repr__(self): | |
return '{0.__class__.__name__}({1})'.format( | |
self, ', '.join(map(repr, self.maps))) | |
@classmethod | |
def fromkeys(cls, iterable, *args): | |
'Create a ChainMap with a single dict created from the iterable.' | |
return cls(dict.fromkeys(iterable, *args)) | |
def copy(self): | |
'New ChainMap or subclass with a new copy of maps[0] and refs to maps[1:]' | |
return self.__class__(self.maps[0].copy(), *self.maps[1:]) | |
__copy__ = copy | |
def new_child(self): # like Django's Context.push() | |
'New ChainMap with a new dict followed by all previous maps.' | |
return self.__class__({}, *self.maps) | |
@property | |
def parents(self): # like Django's Context.pop() | |
'New ChainMap from maps[1:].' | |
return self.__class__(*self.maps[1:]) | |
def __setitem__(self, key, value): | |
self.maps[0][key] = value | |
def __delitem__(self, key): | |
try: | |
del self.maps[0][key] | |
except KeyError: | |
raise KeyError('Key not found in the first mapping: {!r}'.format(key)) | |
def popitem(self): | |
'Remove and return an item pair from maps[0]. Raise KeyError is maps[0] is empty.' | |
try: | |
return self.maps[0].popitem() | |
except KeyError: | |
raise KeyError('No keys found in the first mapping.') | |
def pop(self, key, *args): | |
'Remove *key* from maps[0] and return its value. Raise KeyError if *key* not in maps[0].' | |
try: | |
return self.maps[0].pop(key, *args) | |
except KeyError: | |
raise KeyError('Key not found in the first mapping: {!r}'.format(key)) | |
def clear(self): | |
'Clear maps[0], leaving maps[1:] intact.' | |
self.maps[0].clear() | |
try: | |
from imp import cache_from_source | |
except ImportError: # pragma: no cover | |
def cache_from_source(path, debug_override=None): | |
assert path.endswith('.py') | |
if debug_override is None: | |
debug_override = __debug__ | |
if debug_override: | |
suffix = 'c' | |
else: | |
suffix = 'o' | |
return path + suffix | |
try: | |
from collections import OrderedDict | |
except ImportError: # pragma: no cover | |
## {{{ http://code.activestate.com/recipes/576693/ (r9) | |
# Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy. | |
# Passes Python2.7's test suite and incorporates all the latest updates. | |
try: | |
from thread import get_ident as _get_ident | |
except ImportError: | |
from dummy_thread import get_ident as _get_ident | |
try: | |
from _abcoll import KeysView, ValuesView, ItemsView | |
except ImportError: | |
pass | |
class OrderedDict(dict): | |
'Dictionary that remembers insertion order' | |
# An inherited dict maps keys to values. | |
# The inherited dict provides __getitem__, __len__, __contains__, and get. | |
# The remaining methods are order-aware. | |
# Big-O running times for all methods are the same as for regular dictionaries. | |
# The internal self.__map dictionary maps keys to links in a doubly linked list. | |
# The circular doubly linked list starts and ends with a sentinel element. | |
# The sentinel element never gets deleted (this simplifies the algorithm). | |
# Each link is stored as a list of length three: [PREV, NEXT, KEY]. | |
def __init__(self, *args, **kwds): | |
'''Initialize an ordered dictionary. Signature is the same as for | |
regular dictionaries, but keyword arguments are not recommended | |
because their insertion order is arbitrary. | |
''' | |
if len(args) > 1: | |
raise TypeError('expected at most 1 arguments, got %d' % len(args)) | |
try: | |
self.__root | |
except AttributeError: | |
self.__root = root = [] # sentinel node | |
root[:] = [root, root, None] | |
self.__map = {} | |
self.__update(*args, **kwds) | |
def __setitem__(self, key, value, dict_setitem=dict.__setitem__): | |
'od.__setitem__(i, y) <==> od[i]=y' | |
# Setting a new item creates a new link which goes at the end of the linked | |
# list, and the inherited dictionary is updated with the new key/value pair. | |
if key not in self: | |
root = self.__root | |
last = root[0] | |
last[1] = root[0] = self.__map[key] = [last, root, key] | |
dict_setitem(self, key, value) | |
def __delitem__(self, key, dict_delitem=dict.__delitem__): | |
'od.__delitem__(y) <==> del od[y]' | |
# Deleting an existing item uses self.__map to find the link which is | |
# then removed by updating the links in the predecessor and successor nodes. | |
dict_delitem(self, key) | |
link_prev, link_next, key = self.__map.pop(key) | |
link_prev[1] = link_next | |
link_next[0] = link_prev | |
def __iter__(self): | |
'od.__iter__() <==> iter(od)' | |
root = self.__root | |
curr = root[1] | |
while curr is not root: | |
yield curr[2] | |
curr = curr[1] | |
def __reversed__(self): | |
'od.__reversed__() <==> reversed(od)' | |
root = self.__root | |
curr = root[0] | |
while curr is not root: | |
yield curr[2] | |
curr = curr[0] | |
def clear(self): | |
'od.clear() -> None. Remove all items from od.' | |
try: | |
for node in self.__map.itervalues(): | |
del node[:] | |
root = self.__root | |
root[:] = [root, root, None] | |
self.__map.clear() | |
except AttributeError: | |
pass | |
dict.clear(self) | |
def popitem(self, last=True): | |
'''od.popitem() -> (k, v), return and remove a (key, value) pair. | |
Pairs are returned in LIFO order if last is true or FIFO order if false. | |
''' | |
if not self: | |
raise KeyError('dictionary is empty') | |
root = self.__root | |
if last: | |
link = root[0] | |
link_prev = link[0] | |
link_prev[1] = root | |
root[0] = link_prev | |
else: | |
link = root[1] | |
link_next = link[1] | |
root[1] = link_next | |
link_next[0] = root | |
key = link[2] | |
del self.__map[key] | |
value = dict.pop(self, key) | |
return key, value | |
# -- the following methods do not depend on the internal structure -- | |
def keys(self): | |
'od.keys() -> list of keys in od' | |
return list(self) | |
def values(self): | |
'od.values() -> list of values in od' | |
return [self[key] for key in self] | |
def items(self): | |
'od.items() -> list of (key, value) pairs in od' | |
return [(key, self[key]) for key in self] | |
def iterkeys(self): | |
'od.iterkeys() -> an iterator over the keys in od' | |
return iter(self) | |
def itervalues(self): | |
'od.itervalues -> an iterator over the values in od' | |
for k in self: | |
yield self[k] | |
def iteritems(self): | |
'od.iteritems -> an iterator over the (key, value) items in od' | |
for k in self: | |
yield (k, self[k]) | |
def update(*args, **kwds): | |
'''od.update(E, **F) -> None. Update od from dict/iterable E and F. | |
If E is a dict instance, does: for k in E: od[k] = E[k] | |
If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] | |
Or if E is an iterable of items, does: for k, v in E: od[k] = v | |
In either case, this is followed by: for k, v in F.items(): od[k] = v | |
''' | |
if len(args) > 2: | |
raise TypeError('update() takes at most 2 positional ' | |
'arguments (%d given)' % (len(args),)) | |
elif not args: | |
raise TypeError('update() takes at least 1 argument (0 given)') | |
self = args[0] | |
# Make progressively weaker assumptions about "other" | |
other = () | |
if len(args) == 2: | |
other = args[1] | |
if isinstance(other, dict): | |
for key in other: | |
self[key] = other[key] | |
elif hasattr(other, 'keys'): | |
for key in other.keys(): | |
self[key] = other[key] | |
else: | |
for key, value in other: | |
self[key] = value | |
for key, value in kwds.items(): | |
self[key] = value | |
__update = update # let subclasses override update without breaking __init__ | |
__marker = object() | |
def pop(self, key, default=__marker): | |
'''od.pop(k[,d]) -> v, remove specified key and return the corresponding value. | |
If key is not found, d is returned if given, otherwise KeyError is raised. | |
''' | |
if key in self: | |
result = self[key] | |
del self[key] | |
return result | |
if default is self.__marker: | |
raise KeyError(key) | |
return default | |
def setdefault(self, key, default=None): | |
'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' | |
if key in self: | |
return self[key] | |
self[key] = default | |
return default | |
def __repr__(self, _repr_running=None): | |
'od.__repr__() <==> repr(od)' | |
if not _repr_running: _repr_running = {} | |
call_key = id(self), _get_ident() | |
if call_key in _repr_running: | |
return '...' | |
_repr_running[call_key] = 1 | |
try: | |
if not self: | |
return '%s()' % (self.__class__.__name__,) | |
return '%s(%r)' % (self.__class__.__name__, self.items()) | |
finally: | |
del _repr_running[call_key] | |
def __reduce__(self): | |
'Return state information for pickling' | |
items = [[k, self[k]] for k in self] | |
inst_dict = vars(self).copy() | |
for k in vars(OrderedDict()): | |
inst_dict.pop(k, None) | |
if inst_dict: | |
return (self.__class__, (items,), inst_dict) | |
return self.__class__, (items,) | |
def copy(self): | |
'od.copy() -> a shallow copy of od' | |
return self.__class__(self) | |
@classmethod | |
def fromkeys(cls, iterable, value=None): | |
'''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S | |
and values equal to v (which defaults to None). | |
''' | |
d = cls() | |
for key in iterable: | |
d[key] = value | |
return d | |
def __eq__(self, other): | |
'''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive | |
while comparison to a regular mapping is order-insensitive. | |
''' | |
if isinstance(other, OrderedDict): | |
return len(self)==len(other) and self.items() == other.items() | |
return dict.__eq__(self, other) | |
def __ne__(self, other): | |
return not self == other | |
# -- the following methods are only used in Python 2.7 -- | |
def viewkeys(self): | |
"od.viewkeys() -> a set-like object providing a view on od's keys" | |
return KeysView(self) | |
def viewvalues(self): | |
"od.viewvalues() -> an object providing a view on od's values" | |
return ValuesView(self) | |
def viewitems(self): | |
"od.viewitems() -> a set-like object providing a view on od's items" | |
return ItemsView(self) | |
try: | |
from logging.config import BaseConfigurator, valid_ident | |
except ImportError: # pragma: no cover | |
IDENTIFIER = re.compile('^[a-z_][a-z0-9_]*$', re.I) | |
def valid_ident(s): | |
m = IDENTIFIER.match(s) | |
if not m: | |
raise ValueError('Not a valid Python identifier: %r' % s) | |
return True | |
# The ConvertingXXX classes are wrappers around standard Python containers, | |
# and they serve to convert any suitable values in the container. The | |
# conversion converts base dicts, lists and tuples to their wrapped | |
# equivalents, whereas strings which match a conversion format are converted | |
# appropriately. | |
# | |
# Each wrapper should have a configurator attribute holding the actual | |
# configurator to use for conversion. | |
class ConvertingDict(dict): | |
"""A converting dictionary wrapper.""" | |
def __getitem__(self, key): | |
value = dict.__getitem__(self, key) | |
result = self.configurator.convert(value) | |
#If the converted value is different, save for next time | |
if value is not result: | |
self[key] = result | |
if type(result) in (ConvertingDict, ConvertingList, | |
ConvertingTuple): | |
result.parent = self | |
result.key = key | |
return result | |
def get(self, key, default=None): | |
value = dict.get(self, key, default) | |
result = self.configurator.convert(value) | |
#If the converted value is different, save for next time | |
if value is not result: | |
self[key] = result | |
if type(result) in (ConvertingDict, ConvertingList, | |
ConvertingTuple): | |
result.parent = self | |
result.key = key | |
return result | |
def pop(self, key, default=None): | |
value = dict.pop(self, key, default) | |
result = self.configurator.convert(value) | |
if value is not result: | |
if type(result) in (ConvertingDict, ConvertingList, | |
ConvertingTuple): | |
result.parent = self | |
result.key = key | |
return result | |
class ConvertingList(list): | |
"""A converting list wrapper.""" | |
def __getitem__(self, key): | |
value = list.__getitem__(self, key) | |
result = self.configurator.convert(value) | |
#If the converted value is different, save for next time | |
if value is not result: | |
self[key] = result | |
if type(result) in (ConvertingDict, ConvertingList, | |
ConvertingTuple): | |
result.parent = self | |
result.key = key | |
return result | |
def pop(self, idx=-1): | |
value = list.pop(self, idx) | |
result = self.configurator.convert(value) | |
if value is not result: | |
if type(result) in (ConvertingDict, ConvertingList, | |
ConvertingTuple): | |
result.parent = self | |
return result | |
class ConvertingTuple(tuple): | |
"""A converting tuple wrapper.""" | |
def __getitem__(self, key): | |
value = tuple.__getitem__(self, key) | |
result = self.configurator.convert(value) | |
if value is not result: | |
if type(result) in (ConvertingDict, ConvertingList, | |
ConvertingTuple): | |
result.parent = self | |
result.key = key | |
return result | |
class BaseConfigurator(object): | |
""" | |
The configurator base class which defines some useful defaults. | |
""" | |
CONVERT_PATTERN = re.compile(r'^(?P<prefix>[a-z]+)://(?P<suffix>.*)$') | |
WORD_PATTERN = re.compile(r'^\s*(\w+)\s*') | |
DOT_PATTERN = re.compile(r'^\.\s*(\w+)\s*') | |
INDEX_PATTERN = re.compile(r'^\[\s*(\w+)\s*\]\s*') | |
DIGIT_PATTERN = re.compile(r'^\d+$') | |
value_converters = { | |
'ext' : 'ext_convert', | |
'cfg' : 'cfg_convert', | |
} | |
# We might want to use a different one, e.g. importlib | |
importer = staticmethod(__import__) | |
def __init__(self, config): | |
self.config = ConvertingDict(config) | |
self.config.configurator = self | |
def resolve(self, s): | |
""" | |
Resolve strings to objects using standard import and attribute | |
syntax. | |
""" | |
name = s.split('.') | |
used = name.pop(0) | |
try: | |
found = self.importer(used) | |
for frag in name: | |
used += '.' + frag | |
try: | |
found = getattr(found, frag) | |
except AttributeError: | |
self.importer(used) | |
found = getattr(found, frag) | |
return found | |
except ImportError: | |
e, tb = sys.exc_info()[1:] | |
v = ValueError('Cannot resolve %r: %s' % (s, e)) | |
v.__cause__, v.__traceback__ = e, tb | |
raise v | |
def ext_convert(self, value): | |
"""Default converter for the ext:// protocol.""" | |
return self.resolve(value) | |
def cfg_convert(self, value): | |
"""Default converter for the cfg:// protocol.""" | |
rest = value | |
m = self.WORD_PATTERN.match(rest) | |
if m is None: | |
raise ValueError("Unable to convert %r" % value) | |
else: | |
rest = rest[m.end():] | |
d = self.config[m.groups()[0]] | |
#print d, rest | |
while rest: | |
m = self.DOT_PATTERN.match(rest) | |
if m: | |
d = d[m.groups()[0]] | |
else: | |
m = self.INDEX_PATTERN.match(rest) | |
if m: | |
idx = m.groups()[0] | |
if not self.DIGIT_PATTERN.match(idx): | |
d = d[idx] | |
else: | |
try: | |
n = int(idx) # try as number first (most likely) | |
d = d[n] | |
except TypeError: | |
d = d[idx] | |
if m: | |
rest = rest[m.end():] | |
else: | |
raise ValueError('Unable to convert ' | |
'%r at %r' % (value, rest)) | |
#rest should be empty | |
return d | |
def convert(self, value): | |
""" | |
Convert values to an appropriate type. dicts, lists and tuples are | |
replaced by their converting alternatives. Strings are checked to | |
see if they have a conversion format and are converted if they do. | |
""" | |
if not isinstance(value, ConvertingDict) and isinstance(value, dict): | |
value = ConvertingDict(value) | |
value.configurator = self | |
elif not isinstance(value, ConvertingList) and isinstance(value, list): | |
value = ConvertingList(value) | |
value.configurator = self | |
elif not isinstance(value, ConvertingTuple) and\ | |
isinstance(value, tuple): | |
value = ConvertingTuple(value) | |
value.configurator = self | |
elif isinstance(value, string_types): | |
m = self.CONVERT_PATTERN.match(value) | |
if m: | |
d = m.groupdict() | |
prefix = d['prefix'] | |
converter = self.value_converters.get(prefix, None) | |
if converter: | |
suffix = d['suffix'] | |
converter = getattr(self, converter) | |
value = converter(suffix) | |
return value | |
def configure_custom(self, config): | |
"""Configure an object with a user-supplied factory.""" | |
c = config.pop('()') | |
if not callable(c): | |
c = self.resolve(c) | |
props = config.pop('.', None) | |
# Check for valid identifiers | |
kwargs = dict([(k, config[k]) for k in config if valid_ident(k)]) | |
result = c(**kwargs) | |
if props: | |
for name, value in props.items(): | |
setattr(result, name, value) | |
return result | |
def as_tuple(self, value): | |
"""Utility function which converts lists to tuples.""" | |
if isinstance(value, list): | |
value = tuple(value) | |
return value |
# -*- coding: utf-8 -*- | |
# | |
# Copyright (C) 2012-2016 The Python Software Foundation. | |
# See LICENSE.txt and CONTRIBUTORS.txt. | |
# | |
"""PEP 376 implementation.""" | |
from __future__ import unicode_literals | |
import base64 | |
import codecs | |
import contextlib | |
import hashlib | |
import logging | |
import os | |
import posixpath | |
import sys | |
import zipimport | |
from . import DistlibException, resources | |
from .compat import StringIO | |
from .version import get_scheme, UnsupportedVersionError | |
from .metadata import Metadata, METADATA_FILENAME, WHEEL_METADATA_FILENAME | |
from .util import (parse_requirement, cached_property, parse_name_and_version, | |
read_exports, write_exports, CSVReader, CSVWriter) | |
__all__ = ['Distribution', 'BaseInstalledDistribution', | |
'InstalledDistribution', 'EggInfoDistribution', | |
'DistributionPath'] | |
logger = logging.getLogger(__name__) | |
EXPORTS_FILENAME = 'pydist-exports.json' | |
COMMANDS_FILENAME = 'pydist-commands.json' | |
DIST_FILES = ('INSTALLER', METADATA_FILENAME, 'RECORD', 'REQUESTED', | |
'RESOURCES', EXPORTS_FILENAME, 'SHARED') | |
DISTINFO_EXT = '.dist-info' | |
class _Cache(object): | |
""" | |
A simple cache mapping names and .dist-info paths to distributions | |
""" | |
def __init__(self): | |
""" | |
Initialise an instance. There is normally one for each DistributionPath. | |
""" | |
self.name = {} | |
self.path = {} | |
self.generated = False | |
def clear(self): | |
""" | |
Clear the cache, setting it to its initial state. | |
""" | |
self.name.clear() | |
self.path.clear() | |
self.generated = False | |
def add(self, dist): | |
""" | |
Add a distribution to the cache. | |
:param dist: The distribution to add. | |
""" | |
if dist.path not in self.path: | |
self.path[dist.path] = dist | |
self.name.setdefault(dist.key, []).append(dist) | |
class DistributionPath(object): | |
""" | |
Represents a set of distributions installed on a path (typically sys.path). | |
""" | |
def __init__(self, path=None, include_egg=False): | |
""" | |
Create an instance from a path, optionally including legacy (distutils/ | |
setuptools/distribute) distributions. | |
:param path: The path to use, as a list of directories. If not specified, | |
sys.path is used. | |
:param include_egg: If True, this instance will look for and return legacy | |
distributions as well as those based on PEP 376. | |
""" | |
if path is None: | |
path = sys.path | |
self.path = path | |
self._include_dist = True | |
self._include_egg = include_egg | |
self._cache = _Cache() | |
self._cache_egg = _Cache() | |
self._cache_enabled = True | |
self._scheme = get_scheme('default') | |
def _get_cache_enabled(self): | |
return self._cache_enabled | |
def _set_cache_enabled(self, value): | |
self._cache_enabled = value | |
cache_enabled = property(_get_cache_enabled, _set_cache_enabled) | |
def clear_cache(self): | |
""" | |
Clears the internal cache. | |
""" | |
self._cache.clear() | |
self._cache_egg.clear() | |
def _yield_distributions(self): | |
""" | |
Yield .dist-info and/or .egg(-info) distributions. | |
""" | |
# We need to check if we've seen some resources already, because on | |
# some Linux systems (e.g. some Debian/Ubuntu variants) there are | |
# symlinks which alias other files in the environment. | |
seen = set() | |
for path in self.path: | |
finder = resources.finder_for_path(path) | |
if finder is None: | |
continue | |
r = finder.find('') | |
if not r or not r.is_container: | |
continue | |
rset = sorted(r.resources) | |
for entry in rset: | |
r = finder.find(entry) | |
if not r or r.path in seen: | |
continue | |
if self._include_dist and entry.endswith(DISTINFO_EXT): | |
possible_filenames = [METADATA_FILENAME, WHEEL_METADATA_FILENAME] | |
for metadata_filename in possible_filenames: | |
metadata_path = posixpath.join(entry, metadata_filename) | |
pydist = finder.find(metadata_path) | |
if pydist: | |
break | |
else: | |
continue | |
with contextlib.closing(pydist.as_stream()) as stream: | |
metadata = Metadata(fileobj=stream, scheme='legacy') | |
logger.debug('Found %s', r.path) | |
seen.add(r.path) | |
yield new_dist_class(r.path, metadata=metadata, | |
env=self) | |
elif self._include_egg and entry.endswith(('.egg-info', | |
'.egg')): | |
logger.debug('Found %s', r.path) | |
seen.add(r.path) | |
yield old_dist_class(r.path, self) | |
def _generate_cache(self): | |
""" | |
Scan the path for distributions and populate the cache with | |
those that are found. | |
""" | |
gen_dist = not self._cache.generated | |
gen_egg = self._include_egg and not self._cache_egg.generated | |
if gen_dist or gen_egg: | |
for dist in self._yield_distributions(): | |
if isinstance(dist, InstalledDistribution): | |
self._cache.add(dist) | |
else: | |
self._cache_egg.add(dist) | |
if gen_dist: | |
self._cache.generated = True | |
if gen_egg: | |
self._cache_egg.generated = True | |
@classmethod | |
def distinfo_dirname(cls, name, version): | |
""" | |
The *name* and *version* parameters are converted into their | |
filename-escaped form, i.e. any ``'-'`` characters are replaced | |
with ``'_'`` other than the one in ``'dist-info'`` and the one | |
separating the name from the version number. | |
:parameter name: is converted to a standard distribution name by replacing | |
any runs of non- alphanumeric characters with a single | |
``'-'``. | |
:type name: string | |
:parameter version: is converted to a standard version string. Spaces | |
become dots, and all other non-alphanumeric characters | |
(except dots) become dashes, with runs of multiple | |
dashes condensed to a single dash. | |
:type version: string | |
:returns: directory name | |
:rtype: string""" | |
name = name.replace('-', '_') | |
return '-'.join([name, version]) + DISTINFO_EXT | |
def get_distributions(self): | |
""" | |
Provides an iterator that looks for distributions and returns | |
:class:`InstalledDistribution` or | |
:class:`EggInfoDistribution` instances for each one of them. | |
:rtype: iterator of :class:`InstalledDistribution` and | |
:class:`EggInfoDistribution` instances | |
""" | |
if not self._cache_enabled: | |
for dist in self._yield_distributions(): | |
yield dist | |
else: | |
self._generate_cache() | |
for dist in self._cache.path.values(): | |
yield dist | |
if self._include_egg: | |
for dist in self._cache_egg.path.values(): | |
yield dist | |
def get_distribution(self, name): | |
""" | |
Looks for a named distribution on the path. | |
This function only returns the first result found, as no more than one | |
value is expected. If nothing is found, ``None`` is returned. | |
:rtype: :class:`InstalledDistribution`, :class:`EggInfoDistribution` | |
or ``None`` | |
""" | |
result = None | |
name = name.lower() | |
if not self._cache_enabled: | |
for dist in self._yield_distributions(): | |
if dist.key == name: | |
result = dist | |
break | |
else: | |
self._generate_cache() | |
if name in self._cache.name: | |
result = self._cache.name[name][0] | |
elif self._include_egg and name in self._cache_egg.name: | |
result = self._cache_egg.name[name][0] | |
return result | |
def provides_distribution(self, name, version=None): | |
""" | |
Iterates over all distributions to find which distributions provide *name*. | |
If a *version* is provided, it will be used to filter the results. | |
This function only returns the first result found, since no more than | |
one values are expected. If the directory is not found, returns ``None``. | |
:parameter version: a version specifier that indicates the version | |
required, conforming to the format in ``PEP-345`` | |
:type name: string | |
:type version: string | |
""" | |
matcher = None | |
if not version is None: | |
try: | |
matcher = self._scheme.matcher('%s (%s)' % (name, version)) | |
except ValueError: | |
raise DistlibException('invalid name or version: %r, %r' % | |
(name, version)) | |
for dist in self.get_distributions(): | |
provided = dist.provides | |
for p in provided: | |
p_name, p_ver = parse_name_and_version(p) | |
if matcher is None: | |
if p_name == name: | |
yield dist | |
break | |
else: | |
if p_name == name and matcher.match(p_ver): | |
yield dist | |
break | |
def get_file_path(self, name, relative_path): | |
""" | |
Return the path to a resource file. | |
""" | |
dist = self.get_distribution(name) | |
if dist is None: | |
raise LookupError('no distribution named %r found' % name) | |
return dist.get_resource_path(relative_path) | |
def get_exported_entries(self, category, name=None): | |
""" | |
Return all of the exported entries in a particular category. | |
:param category: The category to search for entries. | |
:param name: If specified, only entries with that name are returned. | |
""" | |
for dist in self.get_distributions(): | |
r = dist.exports | |
if category in r: | |
d = r[category] | |
if name is not None: | |
if name in d: | |
yield d[name] | |
else: | |
for v in d.values(): | |
yield v | |
class Distribution(object): | |
""" | |
A base class for distributions, whether installed or from indexes. | |
Either way, it must have some metadata, so that's all that's needed | |
for construction. | |
""" | |
build_time_dependency = False | |
""" | |
Set to True if it's known to be only a build-time dependency (i.e. | |
not needed after installation). | |
""" | |
requested = False | |
"""A boolean that indicates whether the ``REQUESTED`` metadata file is | |
present (in other words, whether the package was installed by user | |
request or it was installed as a dependency).""" | |
def __init__(self, metadata): | |
""" | |
Initialise an instance. | |
:param metadata: The instance of :class:`Metadata` describing this | |
distribution. | |
""" | |
self.metadata = metadata | |
self.name = metadata.name | |
self.key = self.name.lower() # for case-insensitive comparisons | |
self.version = metadata.version | |
self.locator = None | |
self.digest = None | |
self.extras = None # additional features requested | |
self.context = None # environment marker overrides | |
self.download_urls = set() | |
self.digests = {} | |
@property | |
def source_url(self): | |
""" | |
The source archive download URL for this distribution. | |
""" | |
return self.metadata.source_url | |
download_url = source_url # Backward compatibility | |
@property | |
def name_and_version(self): | |
""" | |
A utility property which displays the name and version in parentheses. | |
""" | |
return '%s (%s)' % (self.name, self.version) | |
@property | |
def provides(self): | |
""" | |
A set of distribution names and versions provided by this distribution. | |
:return: A set of "name (version)" strings. | |
""" | |
plist = self.metadata.provides | |
s = '%s (%s)' % (self.name, self.version) | |
if s not in plist: | |
plist.append(s) | |
return plist | |
def _get_requirements(self, req_attr): | |
md = self.metadata | |
logger.debug('Getting requirements from metadata %r', md.todict()) | |
reqts = getattr(md, req_attr) | |
return set(md.get_requirements(reqts, extras=self.extras, | |
env=self.context)) | |
@property | |
def run_requires(self): | |
return self._get_requirements('run_requires') | |
@property | |
def meta_requires(self): | |
return self._get_requirements('meta_requires') | |
@property | |
def build_requires(self): | |
return self._get_requirements('build_requires') | |
@property | |
def test_requires(self): | |
return self._get_requirements('test_requires') | |
@property | |
def dev_requires(self): | |
return self._get_requirements('dev_requires') | |
def matches_requirement(self, req): | |
""" | |
Say if this instance matches (fulfills) a requirement. | |
:param req: The requirement to match. | |
:rtype req: str | |
:return: True if it matches, else False. | |
""" | |
# Requirement may contain extras - parse to lose those | |
# from what's passed to the matcher | |
r = parse_requirement(req) | |
scheme = get_scheme(self.metadata.scheme) | |
try: | |
matcher = scheme.matcher(r.requirement) | |
except UnsupportedVersionError: | |
# XXX compat-mode if cannot read the version | |
logger.warning('could not read version %r - using name only', | |
req) | |
name = req.split()[0] | |
matcher = scheme.matcher(name) | |
name = matcher.key # case-insensitive | |
result = False | |
for p in self.provides: | |
p_name, p_ver = parse_name_and_version(p) | |
if p_name != name: | |
continue | |
try: | |
result = matcher.match(p_ver) | |
break | |
except UnsupportedVersionError: | |
pass | |
return result | |
def __repr__(self): | |
""" | |
Return a textual representation of this instance, | |
""" | |
if self.source_url: | |
suffix = ' [%s]' % self.source_url | |
else: | |
suffix = '' | |
return '<Distribution %s (%s)%s>' % (self.name, self.version, suffix) | |
def __eq__(self, other): | |
""" | |
See if this distribution is the same as another. | |
:param other: The distribution to compare with. To be equal to one | |
another. distributions must have the same type, name, | |
version and source_url. | |
:return: True if it is the same, else False. | |
""" | |
if type(other) is not type(self): | |
result = False | |
else: | |
result = (self.name == other.name and | |
self.version == other.version and | |
self.source_url == other.source_url) | |
return result | |
def __hash__(self): | |
""" | |
Compute hash in a way which matches the equality test. | |
""" | |
return hash(self.name) + hash(self.version) + hash(self.source_url) | |
class BaseInstalledDistribution(Distribution): | |
""" | |
This is the base class for installed distributions (whether PEP 376 or | |
legacy). | |
""" | |
hasher = None | |
def __init__(self, metadata, path, env=None): | |
""" | |
Initialise an instance. | |
:param metadata: An instance of :class:`Metadata` which describes the | |
distribution. This will normally have been initialised | |
from a metadata file in the ``path``. | |
:param path: The path of the ``.dist-info`` or ``.egg-info`` | |
directory for the distribution. | |
:param env: This is normally the :class:`DistributionPath` | |
instance where this distribution was found. | |
""" | |
super(BaseInstalledDistribution, self).__init__(metadata) | |
self.path = path | |
self.dist_path = env | |
def get_hash(self, data, hasher=None): | |
""" | |
Get the hash of some data, using a particular hash algorithm, if | |
specified. | |
:param data: The data to be hashed. | |
:type data: bytes | |
:param hasher: The name of a hash implementation, supported by hashlib, | |
or ``None``. Examples of valid values are ``'sha1'``, | |
``'sha224'``, ``'sha384'``, '``sha256'``, ``'md5'`` and | |
``'sha512'``. If no hasher is specified, the ``hasher`` | |
attribute of the :class:`InstalledDistribution` instance | |
is used. If the hasher is determined to be ``None``, MD5 | |
is used as the hashing algorithm. | |
:returns: The hash of the data. If a hasher was explicitly specified, | |
the returned hash will be prefixed with the specified hasher | |
followed by '='. | |
:rtype: str | |
""" | |
if hasher is None: | |
hasher = self.hasher | |
if hasher is None: | |
hasher = hashlib.md5 | |
prefix = '' | |
else: | |
hasher = getattr(hashlib, hasher) | |
prefix = '%s=' % self.hasher | |
digest = hasher(data).digest() | |
digest = base64.urlsafe_b64encode(digest).rstrip(b'=').decode('ascii') | |
return '%s%s' % (prefix, digest) | |
class InstalledDistribution(BaseInstalledDistribution): | |
""" | |
Created with the *path* of the ``.dist-info`` directory provided to the | |
constructor. It reads the metadata contained in ``pydist.json`` when it is | |
instantiated., or uses a passed in Metadata instance (useful for when | |
dry-run mode is being used). | |
""" | |
hasher = 'sha256' | |
def __init__(self, path, metadata=None, env=None): | |
self.finder = finder = resources.finder_for_path(path) | |
if finder is None: | |
import pdb; pdb.set_trace () | |
if env and env._cache_enabled and path in env._cache.path: | |
metadata = env._cache.path[path].metadata | |
elif metadata is None: | |
r = finder.find(METADATA_FILENAME) | |
# Temporary - for Wheel 0.23 support | |
if r is None: | |
r = finder.find(WHEEL_METADATA_FILENAME) | |
# Temporary - for legacy support | |
if r is None: | |
r = finder.find('METADATA') | |
if r is None: | |
raise ValueError('no %s found in %s' % (METADATA_FILENAME, | |
path)) | |
with contextlib.closing(r.as_stream()) as stream: | |
metadata = Metadata(fileobj=stream, scheme='legacy') | |
super(InstalledDistribution, self).__init__(metadata, path, env) | |
if env and env._cache_enabled: | |
env._cache.add(self) | |
try: | |
r = finder.find('REQUESTED') | |
except AttributeError: | |
import pdb; pdb.set_trace () | |
self.requested = r is not None | |
def __repr__(self): | |
return '<InstalledDistribution %r %s at %r>' % ( | |
self.name, self.version, self.path) | |
def __str__(self): | |
return "%s %s" % (self.name, self.version) | |
def _get_records(self): | |
""" | |
Get the list of installed files for the distribution | |
:return: A list of tuples of path, hash and size. Note that hash and | |
size might be ``None`` for some entries. The path is exactly | |
as stored in the file (which is as in PEP 376). | |
""" | |
results = [] | |
r = self.get_distinfo_resource('RECORD') | |
with contextlib.closing(r.as_stream()) as stream: | |
with CSVReader(stream=stream) as record_reader: | |
# Base location is parent dir of .dist-info dir | |
#base_location = os.path.dirname(self.path) | |
#base_location = os.path.abspath(base_location) | |
for row in record_reader: | |
missing = [None for i in range(len(row), 3)] | |
path, checksum, size = row + missing | |
#if not os.path.isabs(path): | |
# path = path.replace('/', os.sep) | |
# path = os.path.join(base_location, path) | |
results.append((path, checksum, size)) | |
return results | |
@cached_property | |
def exports(self): | |
""" | |
Return the information exported by this distribution. | |
:return: A dictionary of exports, mapping an export category to a dict | |
of :class:`ExportEntry` instances describing the individual | |
export entries, and keyed by name. | |
""" | |
result = {} | |
r = self.get_distinfo_resource(EXPORTS_FILENAME) | |
if r: | |
result = self.read_exports() | |
return result | |
def read_exports(self): | |
""" | |
Read exports data from a file in .ini format. | |
:return: A dictionary of exports, mapping an export category to a list | |
of :class:`ExportEntry` instances describing the individual | |
export entries. | |
""" | |
result = {} | |
r = self.get_distinfo_resource(EXPORTS_FILENAME) | |
if r: | |
with contextlib.closing(r.as_stream()) as stream: | |
result = read_exports(stream) | |
return result | |
def write_exports(self, exports): | |
""" | |
Write a dictionary of exports to a file in .ini format. | |
:param exports: A dictionary of exports, mapping an export category to | |
a list of :class:`ExportEntry` instances describing the | |
individual export entries. | |
""" | |
rf = self.get_distinfo_file(EXPORTS_FILENAME) | |
with open(rf, 'w') as f: | |
write_exports(exports, f) | |
def get_resource_path(self, relative_path): | |
""" | |
NOTE: This API may change in the future. | |
Return the absolute path to a resource file with the given relative | |
path. | |
:param relative_path: The path, relative to .dist-info, of the resource | |
of interest. | |
:return: The absolute path where the resource is to be found. | |
""" | |
r = self.get_distinfo_resource('RESOURCES') | |
with contextlib.closing(r.as_stream()) as stream: | |
with CSVReader(stream=stream) as resources_reader: | |
for relative, destination in resources_reader: | |
if relative == relative_path: | |
return destination | |
raise KeyError('no resource file with relative path %r ' | |
'is installed' % relative_path) | |
def list_installed_files(self): | |
""" | |
Iterates over the ``RECORD`` entries and returns a tuple | |
``(path, hash, size)`` for each line. | |
:returns: iterator of (path, hash, size) | |
""" | |
for result in self._get_records(): | |
yield result | |
def write_installed_files(self, paths, prefix, dry_run=False): | |
""" | |
Writes the ``RECORD`` file, using the ``paths`` iterable passed in. Any | |
existing ``RECORD`` file is silently overwritten. | |
prefix is used to determine when to write absolute paths. | |
""" | |
prefix = os.path.join(prefix, '') | |
base = os.path.dirname(self.path) | |
base_under_prefix = base.startswith(prefix) | |
base = os.path.join(base, '') | |
record_path = self.get_distinfo_file('RECORD') | |
logger.info('creating %s', record_path) | |
if dry_run: | |
return None | |
with CSVWriter(record_path) as writer: | |
for path in paths: | |
if os.path.isdir(path) or path.endswith(('.pyc', '.pyo')): | |
# do not put size and hash, as in PEP-376 | |
hash_value = size = '' | |
else: | |
size = '%d' % os.path.getsize(path) | |
with open(path, 'rb') as fp: | |
hash_value = self.get_hash(fp.read()) | |
if path.startswith(base) or (base_under_prefix and | |
path.startswith(prefix)): | |
path = os.path.relpath(path, base) | |
writer.writerow((path, hash_value, size)) | |
# add the RECORD file itself | |
if record_path.startswith(base): | |
record_path = os.path.relpath(record_path, base) | |
writer.writerow((record_path, '', '')) | |
return record_path | |
def check_installed_files(self): | |
""" | |
Checks that the hashes and sizes of the files in ``RECORD`` are | |
matched by the files themselves. Returns a (possibly empty) list of | |
mismatches. Each entry in the mismatch list will be a tuple consisting | |
of the path, 'exists', 'size' or 'hash' according to what didn't match | |
(existence is checked first, then size, then hash), the expected | |
value and the actual value. | |
""" | |
mismatches = [] | |
base = os.path.dirname(self.path) | |
record_path = self.get_distinfo_file('RECORD') | |
for path, hash_value, size in self.list_installed_files(): | |
if not os.path.isabs(path): | |
path = os.path.join(base, path) | |
if path == record_path: | |
continue | |
if not os.path.exists(path): | |
mismatches.append((path, 'exists', True, False)) | |
elif os.path.isfile(path): | |
actual_size = str(os.path.getsize(path)) | |
if size and actual_size != size: | |
mismatches.append((path, 'size', size, actual_size)) | |
elif hash_value: | |
if '=' in hash_value: | |
hasher = hash_value.split('=', 1)[0] | |
else: | |
hasher = None | |
with open(path, 'rb') as f: | |
actual_hash = self.get_hash(f.read(), hasher) | |
if actual_hash != hash_value: | |
mismatches.append((path, 'hash', hash_value, actual_hash)) | |
return mismatches | |
@cached_property | |
def shared_locations(self): | |
""" | |
A dictionary of shared locations whose keys are in the set 'prefix', | |
'purelib', 'platlib', 'scripts', 'headers', 'data' and 'namespace'. | |
The corresponding value is the absolute path of that category for | |
this distribution, and takes into account any paths selected by the | |
user at installation time (e.g. via command-line arguments). In the | |
case of the 'namespace' key, this would be a list of absolute paths | |
for the roots of namespace packages in this distribution. | |
The first time this property is accessed, the relevant information is | |
read from the SHARED file in the .dist-info directory. | |
""" | |
result = {} | |
shared_path = os.path.join(self.path, 'SHARED') | |
if os.path.isfile(shared_path): | |
with codecs.open(shared_path, 'r', encoding='utf-8') as f: | |
lines = f.read().splitlines() | |
for line in lines: | |
key, value = line.split('=', 1) | |
if key == 'namespace': | |
result.setdefault(key, []).append(value) | |
else: | |
result[key] = value | |
return result | |
def write_shared_locations(self, paths, dry_run=False): | |
""" | |
Write shared location information to the SHARED file in .dist-info. | |
:param paths: A dictionary as described in the documentation for | |
:meth:`shared_locations`. | |
:param dry_run: If True, the action is logged but no file is actually | |
written. | |
:return: The path of the file written to. | |
""" | |
shared_path = os.path.join(self.path, 'SHARED') | |
logger.info('creating %s', shared_path) | |
if dry_run: | |
return None | |
lines = [] | |
for key in ('prefix', 'lib', 'headers', 'scripts', 'data'): | |
path = paths[key] | |
if os.path.isdir(paths[key]): | |
lines.append('%s=%s' % (key, path)) | |
for ns in paths.get('namespace', ()): | |
lines.append('namespace=%s' % ns) | |
with codecs.open(shared_path, 'w', encoding='utf-8') as f: | |
f.write('\n'.join(lines)) | |
return shared_path | |
def get_distinfo_resource(self, path): | |
if path not in DIST_FILES: | |
raise DistlibException('invalid path for a dist-info file: ' | |
'%r at %r' % (path, self.path)) | |
finder = resources.finder_for_path(self.path) | |
if finder is None: | |
raise DistlibException('Unable to get a finder for %s' % self.path) | |
return finder.find(path) | |
def get_distinfo_file(self, path): | |
""" | |
Returns a path located under the ``.dist-info`` directory. Returns a | |
string representing the path. | |
:parameter path: a ``'/'``-separated path relative to the | |
``.dist-info`` directory or an absolute path; | |
If *path* is an absolute path and doesn't start | |
with the ``.dist-info`` directory path, | |
a :class:`DistlibException` is raised | |
:type path: str | |
:rtype: str | |
""" | |
# Check if it is an absolute path # XXX use relpath, add tests | |
if path.find(os.sep) >= 0: | |
# it's an absolute path? | |
distinfo_dirname, path = path.split(os.sep)[-2:] | |
if distinfo_dirname != self.path.split(os.sep)[-1]: | |
raise DistlibException( | |
'dist-info file %r does not belong to the %r %s ' | |
'distribution' % (path, self.name, self.version)) | |
# The file must be relative | |
if path not in DIST_FILES: | |
raise DistlibException('invalid path for a dist-info file: ' | |
'%r at %r' % (path, self.path)) | |
return os.path.join(self.path, path) | |
def list_distinfo_files(self): | |
""" | |
Iterates over the ``RECORD`` entries and returns paths for each line if | |
the path is pointing to a file located in the ``.dist-info`` directory | |
or one of its subdirectories. | |
:returns: iterator of paths | |
""" | |
base = os.path.dirname(self.path) | |
for path, checksum, size in self._get_records(): | |
# XXX add separator or use real relpath algo | |
if not os.path.isabs(path): | |
path = os.path.join(base, path) | |
if path.startswith(self.path): | |
yield path | |
def __eq__(self, other): | |
return (isinstance(other, InstalledDistribution) and | |
self.path == other.path) | |
# See http://docs.python.org/reference/datamodel#object.__hash__ | |
__hash__ = object.__hash__ | |
class EggInfoDistribution(BaseInstalledDistribution): | |
"""Created with the *path* of the ``.egg-info`` directory or file provided | |
to the constructor. It reads the metadata contained in the file itself, or | |
if the given path happens to be a directory, the metadata is read from the | |
file ``PKG-INFO`` under that directory.""" | |
requested = True # as we have no way of knowing, assume it was | |
shared_locations = {} | |
def __init__(self, path, env=None): | |
def set_name_and_version(s, n, v): | |
s.name = n | |
s.key = n.lower() # for case-insensitive comparisons | |
s.version = v | |
self.path = path | |
self.dist_path = env | |
if env and env._cache_enabled and path in env._cache_egg.path: | |
metadata = env._cache_egg.path[path].metadata | |
set_name_and_version(self, metadata.name, metadata.version) | |
else: | |
metadata = self._get_metadata(path) | |
# Need to be set before caching | |
set_name_and_version(self, metadata.name, metadata.version) | |
if env and env._cache_enabled: | |
env._cache_egg.add(self) | |
super(EggInfoDistribution, self).__init__(metadata, path, env) | |
def _get_metadata(self, path): | |
requires = None | |
def parse_requires_data(data): | |
"""Create a list of dependencies from a requires.txt file. | |
*data*: the contents of a setuptools-produced requires.txt file. | |
""" | |
reqs = [] | |
lines = data.splitlines() | |
for line in lines: | |
line = line.strip() | |
if line.startswith('['): | |
logger.warning('Unexpected line: quitting requirement scan: %r', | |
line) | |
break | |
r = parse_requirement(line) | |
if not r: | |
logger.warning('Not recognised as a requirement: %r', line) | |
continue | |
if r.extras: | |
logger.warning('extra requirements in requires.txt are ' | |
'not supported') | |
if not r.constraints: | |
reqs.append(r.name) | |
else: | |
cons = ', '.join('%s%s' % c for c in r.constraints) | |
reqs.append('%s (%s)' % (r.name, cons)) | |
return reqs | |
def parse_requires_path(req_path): | |
"""Create a list of dependencies from a requires.txt file. | |
*req_path*: the path to a setuptools-produced requires.txt file. | |
""" | |
reqs = [] | |
try: | |
with codecs.open(req_path, 'r', 'utf-8') as fp: | |
reqs = parse_requires_data(fp.read()) | |
except IOError: | |
pass | |
return reqs | |
if path.endswith('.egg'): | |
if os.path.isdir(path): | |
meta_path = os.path.join(path, 'EGG-INFO', 'PKG-INFO') | |
metadata = Metadata(path=meta_path, scheme='legacy') | |
req_path = os.path.join(path, 'EGG-INFO', 'requires.txt') | |
requires = parse_requires_path(req_path) | |
else: | |
# FIXME handle the case where zipfile is not available | |
zipf = zipimport.zipimporter(path) | |
fileobj = StringIO( | |
zipf.get_data('EGG-INFO/PKG-INFO').decode('utf8')) | |
metadata = Metadata(fileobj=fileobj, scheme='legacy') | |
try: | |
data = zipf.get_data('EGG-INFO/requires.txt') | |
requires = parse_requires_data(data.decode('utf-8')) | |
except IOError: | |
requires = None | |
elif path.endswith('.egg-info'): | |
if os.path.isdir(path): | |
req_path = os.path.join(path, 'requires.txt') | |
requires = parse_requires_path(req_path) | |
path = os.path.join(path, 'PKG-INFO') | |
metadata = Metadata(path=path, scheme='legacy') | |
else: | |
raise DistlibException('path must end with .egg-info or .egg, ' | |
'got %r' % path) | |
if requires: | |
metadata.add_requirements(requires) | |
return metadata | |
def __repr__(self): | |
return '<EggInfoDistribution %r %s at %r>' % ( | |
self.name, self.version, self.path) | |
def __str__(self): | |
return "%s %s" % (self.name, self.version) | |
def check_installed_files(self): | |
""" | |
Checks that the hashes and sizes of the files in ``RECORD`` are | |
matched by the files themselves. Returns a (possibly empty) list of | |
mismatches. Each entry in the mismatch list will be a tuple consisting | |
of the path, 'exists', 'size' or 'hash' according to what didn't match | |
(existence is checked first, then size, then hash), the expected | |
value and the actual value. | |
""" | |
mismatches = [] | |
record_path = os.path.join(self.path, 'installed-files.txt') | |
if os.path.exists(record_path): | |
for path, _, _ in self.list_installed_files(): | |
if path == record_path: | |
continue | |
if not os.path.exists(path): | |
mismatches.append((path, 'exists', True, False)) | |
return mismatches | |
def list_installed_files(self): | |
""" | |
Iterates over the ``installed-files.txt`` entries and returns a tuple | |
``(path, hash, size)`` for each line. | |
:returns: a list of (path, hash, size) | |
""" | |
def _md5(path): | |
f = open(path, 'rb') | |
try: | |
content = f.read() | |
finally: | |
f.close() | |
return hashlib.md5(content).hexdigest() | |
def _size(path): | |
return os.stat(path).st_size | |
record_path = os.path.join(self.path, 'installed-files.txt') | |
result = [] | |
if os.path.exists(record_path): | |
with codecs.open(record_path, 'r', encoding='utf-8') as f: | |
for line in f: | |
line = line.strip() | |
p = os.path.normpath(os.path.join(self.path, line)) | |
# "./" is present as a marker between installed files | |
# and installation metadata files | |
if not os.path.exists(p): | |
logger.warning('Non-existent file: %s', p) | |
if p.endswith(('.pyc', '.pyo')): | |
continue | |
#otherwise fall through and fail | |
if not os.path.isdir(p): | |
result.append((p, _md5(p), _size(p))) | |
result.append((record_path, None, None)) | |
return result | |
def list_distinfo_files(self, absolute=False): | |
""" | |
Iterates over the ``installed-files.txt`` entries and returns paths for | |
each line if the path is pointing to a file located in the | |
``.egg-info`` directory or one of its subdirectories. | |
:parameter absolute: If *absolute* is ``True``, each returned path is | |
transformed into a local absolute path. Otherwise the | |
raw value from ``installed-files.txt`` is returned. | |
:type absolute: boolean | |
:returns: iterator of paths | |
""" | |
record_path = os.path.join(self.path, 'installed-files.txt') | |
skip = True | |
with codecs.open(record_path, 'r', encoding='utf-8') as f: | |
for line in f: | |
line = line.strip() | |
if line == './': | |
skip = False | |
continue | |
if not skip: | |
p = os.path.normpath(os.path.join(self.path, line)) | |
if p.startswith(self.path): | |
if absolute: | |
yield p | |
else: | |
yield line | |
def __eq__(self, other): | |
return (isinstance(other, EggInfoDistribution) and | |
self.path == other.path) | |
# See http://docs.python.org/reference/datamodel#object.__hash__ | |
__hash__ = object.__hash__ | |
new_dist_class = InstalledDistribution | |
old_dist_class = EggInfoDistribution | |
class DependencyGraph(object): | |
""" | |
Represents a dependency graph between distributions. | |
The dependency relationships are stored in an ``adjacency_list`` that maps | |
distributions to a list of ``(other, label)`` tuples where ``other`` | |
is a distribution and the edge is labeled with ``label`` (i.e. the version | |
specifier, if such was provided). Also, for more efficient traversal, for | |
every distribution ``x``, a list of predecessors is kept in | |
``reverse_list[x]``. An edge from distribution ``a`` to | |
distribution ``b`` means that ``a`` depends on ``b``. If any missing | |
dependencies are found, they are stored in ``missing``, which is a | |
dictionary that maps distributions to a list of requirements that were not | |
provided by any other distributions. | |
""" | |
def __init__(self): | |
self.adjacency_list = {} | |
self.reverse_list = {} | |
self.missing = {} | |
def add_distribution(self, distribution): | |
"""Add the *distribution* to the graph. | |
:type distribution: :class:`distutils2.database.InstalledDistribution` | |
or :class:`distutils2.database.EggInfoDistribution` | |
""" | |
self.adjacency_list[distribution] = [] | |
self.reverse_list[distribution] = [] | |
#self.missing[distribution] = [] | |
def add_edge(self, x, y, label=None): | |
"""Add an edge from distribution *x* to distribution *y* with the given | |
*label*. | |
:type x: :class:`distutils2.database.InstalledDistribution` or | |
:class:`distutils2.database.EggInfoDistribution` | |
:type y: :class:`distutils2.database.InstalledDistribution` or | |
:class:`distutils2.database.EggInfoDistribution` | |
:type label: ``str`` or ``None`` | |
""" | |
self.adjacency_list[x].append((y, label)) | |
# multiple edges are allowed, so be careful | |
if x not in self.reverse_list[y]: | |
self.reverse_list[y].append(x) | |
def add_missing(self, distribution, requirement): | |
""" | |
Add a missing *requirement* for the given *distribution*. | |
:type distribution: :class:`distutils2.database.InstalledDistribution` | |
or :class:`distutils2.database.EggInfoDistribution` | |
:type requirement: ``str`` | |
""" | |
logger.debug('%s missing %r', distribution, requirement) | |
self.missing.setdefault(distribution, []).append(requirement) | |
def _repr_dist(self, dist): | |
return '%s %s' % (dist.name, dist.version) | |
def repr_node(self, dist, level=1): | |
"""Prints only a subgraph""" | |
output = [self._repr_dist(dist)] | |
for other, label in self.adjacency_list[dist]: | |
dist = self._repr_dist(other) | |
if label is not None: | |
dist = '%s [%s]' % (dist, label) | |
output.append(' ' * level + str(dist)) | |
suboutput = self.repr_node(other, level + 1) | |
subs = suboutput.split('\n') | |
output.extend(subs[1:]) | |
return '\n'.join(output) | |
def to_dot(self, f, skip_disconnected=True): | |
"""Writes a DOT output for the graph to the provided file *f*. | |
If *skip_disconnected* is set to ``True``, then all distributions | |
that are not dependent on any other distribution are skipped. | |
:type f: has to support ``file``-like operations | |
:type skip_disconnected: ``bool`` | |
""" | |
disconnected = [] | |
f.write("digraph dependencies {\n") | |
for dist, adjs in self.adjacency_list.items(): | |
if len(adjs) == 0 and not skip_disconnected: | |
disconnected.append(dist) | |
for other, label in adjs: | |
if not label is None: | |
f.write('"%s" -> "%s" [label="%s"]\n' % | |
(dist.name, other.name, label)) | |
else: | |
f.write('"%s" -> "%s"\n' % (dist.name, other.name)) | |
if not skip_disconnected and len(disconnected) > 0: | |
f.write('subgraph disconnected {\n') | |
f.write('label = "Disconnected"\n') | |
f.write('bgcolor = red\n') | |
for dist in disconnected: | |
f.write('"%s"' % dist.name) | |
f.write('\n') | |
f.write('}\n') | |
f.write('}\n') | |
def topological_sort(self): | |
""" | |
Perform a topological sort of the graph. | |
:return: A tuple, the first element of which is a topologically sorted | |
list of distributions, and the second element of which is a | |
list of distributions that cannot be sorted because they have | |
circular dependencies and so form a cycle. | |
""" | |
result = [] | |
# Make a shallow copy of the adjacency list | |
alist = {} | |
for k, v in self.adjacency_list.items(): | |
alist[k] = v[:] | |
while True: | |
# See what we can remove in this run | |
to_remove = [] | |
for k, v in list(alist.items())[:]: | |
if not v: | |
to_remove.append(k) | |
del alist[k] | |
if not to_remove: | |
# What's left in alist (if anything) is a cycle. | |
break | |
# Remove from the adjacency list of others | |
for k, v in alist.items(): | |
alist[k] = [(d, r) for d, r in v if d not in to_remove] | |
logger.debug('Moving to result: %s', | |
['%s (%s)' % (d.name, d.version) for d in to_remove]) | |
result.extend(to_remove) | |
return result, list(alist.keys()) | |
def __repr__(self): | |
"""Representation of the graph""" | |
output = [] | |
for dist, adjs in self.adjacency_list.items(): | |
output.append(self.repr_node(dist)) | |
return '\n'.join(output) | |
def make_graph(dists, scheme='default'): | |
"""Makes a dependency graph from the given distributions. | |
:parameter dists: a list of distributions | |
:type dists: list of :class:`distutils2.database.InstalledDistribution` and | |
:class:`distutils2.database.EggInfoDistribution` instances | |
:rtype: a :class:`DependencyGraph` instance | |
""" | |
scheme = get_scheme(scheme) | |
graph = DependencyGraph() | |
provided = {} # maps names to lists of (version, dist) tuples | |
# first, build the graph and find out what's provided | |
for dist in dists: | |
graph.add_distribution(dist) | |
for p in dist.provides: | |
name, version = parse_name_and_version(p) | |
logger.debug('Add to provided: %s, %s, %s', name, version, dist) | |
provided.setdefault(name, []).append((version, dist)) | |
# now make the edges | |
for dist in dists: | |
requires = (dist.run_requires | dist.meta_requires | | |
dist.build_requires | dist.dev_requires) | |
for req in requires: | |
try: | |
matcher = scheme.matcher(req) | |
except UnsupportedVersionError: | |
# XXX compat-mode if cannot read the version | |
logger.warning('could not read version %r - using name only', | |
req) | |
name = req.split()[0] | |
matcher = scheme.matcher(name) | |
name = matcher.key # case-insensitive | |
matched = False | |
if name in provided: | |
for version, provider in provided[name]: | |
try: | |
match = matcher.match(version) | |
except UnsupportedVersionError: | |
match = False | |
if match: | |
graph.add_edge(dist, provider, req) | |
matched = True | |
break | |
if not matched: | |
graph.add_missing(dist, req) | |
return graph | |
def get_dependent_dists(dists, dist): | |
"""Recursively generate a list of distributions from *dists* that are | |
dependent on *dist*. | |
:param dists: a list of distributions | |
:param dist: a distribution, member of *dists* for which we are interested | |
""" | |
if dist not in dists: | |
raise DistlibException('given distribution %r is not a member ' | |
'of the list' % dist.name) | |
graph = make_graph(dists) | |
dep = [dist] # dependent distributions | |
todo = graph.reverse_list[dist] # list of nodes we should inspect | |
while todo: | |
d = todo.pop() | |
dep.append(d) | |
for succ in graph.reverse_list[d]: | |
if succ not in dep: | |
todo.append(succ) | |
dep.pop(0) # remove dist from dep, was there to prevent infinite loops | |
return dep | |
def get_required_dists(dists, dist): | |
"""Recursively generate a list of distributions from *dists* that are | |
required by *dist*. | |
:param dists: a list of distributions | |
:param dist: a distribution, member of *dists* for which we are interested | |
""" | |
if dist not in dists: | |
raise DistlibException('given distribution %r is not a member ' | |
'of the list' % dist.name) | |
graph = make_graph(dists) | |
req = [] # required distributions | |
todo = graph.adjacency_list[dist] # list of nodes we should inspect | |
while todo: | |
d = todo.pop()[0] | |
req.append(d) | |
for pred in graph.adjacency_list[d]: | |
if pred not in req: | |
todo.append(pred) | |
return req | |
def make_dist(name, version, **kwargs): | |
""" | |
A convenience method for making a dist given just a name and version. | |
""" | |
summary = kwargs.pop('summary', 'Placeholder for summary') | |
md = Metadata(**kwargs) | |
md.name = name | |
md.version = version | |
md.summary = summary or 'Placeholder for summary' | |
return Distribution(md) |
# -*- coding: utf-8 -*- | |
# | |
# Copyright (C) 2013 Vinay Sajip. | |
# Licensed to the Python Software Foundation under a contributor agreement. | |
# See LICENSE.txt and CONTRIBUTORS.txt. | |
# | |
import hashlib | |
import logging | |
import os | |
import shutil | |
import subprocess | |
import tempfile | |
try: | |
from threading import Thread | |
except ImportError: | |
from dummy_threading import Thread | |
from . import DistlibException | |
from .compat import (HTTPBasicAuthHandler, Request, HTTPPasswordMgr, | |
urlparse, build_opener, string_types) | |
from .util import cached_property, zip_dir, ServerProxy | |
logger = logging.getLogger(__name__) | |
DEFAULT_INDEX = 'https://pypi.python.org/pypi' | |
DEFAULT_REALM = 'pypi' | |
class PackageIndex(object): | |
""" | |
This class represents a package index compatible with PyPI, the Python | |
Package Index. | |
""" | |
boundary = b'----------ThIs_Is_tHe_distlib_index_bouNdaRY_$' | |
def __init__(self, url=None): | |
""" | |
Initialise an instance. | |
:param url: The URL of the index. If not specified, the URL for PyPI is | |
used. | |
""" | |
self.url = url or DEFAULT_INDEX | |
self.read_configuration() | |
scheme, netloc, path, params, query, frag = urlparse(self.url) | |
if params or query or frag or scheme not in ('http', 'https'): | |
raise DistlibException('invalid repository: %s' % self.url) | |
self.password_handler = None | |
self.ssl_verifier = None | |
self.gpg = None | |
self.gpg_home = None | |
self.rpc_proxy = None | |
with open(os.devnull, 'w') as sink: | |
# Use gpg by default rather than gpg2, as gpg2 insists on | |
# prompting for passwords | |
for s in ('gpg', 'gpg2'): | |
try: | |
rc = subprocess.check_call([s, '--version'], stdout=sink, | |
stderr=sink) | |
if rc == 0: | |
self.gpg = s | |
break | |
except OSError: | |
pass | |
def _get_pypirc_command(self): | |
""" | |
Get the distutils command for interacting with PyPI configurations. | |
:return: the command. | |
""" | |
from distutils.core import Distribution | |
from distutils.config import PyPIRCCommand | |
d = Distribution() | |
return PyPIRCCommand(d) | |
def read_configuration(self): | |
""" | |
Read the PyPI access configuration as supported by distutils, getting | |
PyPI to do the actual work. This populates ``username``, ``password``, | |
``realm`` and ``url`` attributes from the configuration. | |
""" | |
# get distutils to do the work | |
c = self._get_pypirc_command() | |
c.repository = self.url | |
cfg = c._read_pypirc() | |
self.username = cfg.get('username') | |
self.password = cfg.get('password') | |
self.realm = cfg.get('realm', 'pypi') | |
self.url = cfg.get('repository', self.url) | |
def save_configuration(self): | |
""" | |
Save the PyPI access configuration. You must have set ``username`` and | |
``password`` attributes before calling this method. | |
Again, distutils is used to do the actual work. | |
""" | |
self.check_credentials() | |
# get distutils to do the work | |
c = self._get_pypirc_command() | |
c._store_pypirc(self.username, self.password) | |
def check_credentials(self): | |
""" | |
Check that ``username`` and ``password`` have been set, and raise an | |
exception if not. | |
""" | |
if self.username is None or self.password is None: | |
raise DistlibException('username and password must be set') | |
pm = HTTPPasswordMgr() | |
_, netloc, _, _, _, _ = urlparse(self.url) | |
pm.add_password(self.realm, netloc, self.username, self.password) | |
self.password_handler = HTTPBasicAuthHandler(pm) | |
def register(self, metadata): | |
""" | |
Register a distribution on PyPI, using the provided metadata. | |
:param metadata: A :class:`Metadata` instance defining at least a name | |
and version number for the distribution to be | |
registered. | |
:return: The HTTP response received from PyPI upon submission of the | |
request. | |
""" | |
self.check_credentials() | |
metadata.validate() | |
d = metadata.todict() | |
d[':action'] = 'verify' | |
request = self.encode_request(d.items(), []) | |
response = self.send_request(request) | |
d[':action'] = 'submit' | |
request = self.encode_request(d.items(), []) | |
return self.send_request(request) | |
def _reader(self, name, stream, outbuf): | |
""" | |
Thread runner for reading lines of from a subprocess into a buffer. | |
:param name: The logical name of the stream (used for logging only). | |
:param stream: The stream to read from. This will typically a pipe | |
connected to the output stream of a subprocess. | |
:param outbuf: The list to append the read lines to. | |
""" | |
while True: | |
s = stream.readline() | |
if not s: | |
break | |
s = s.decode('utf-8').rstrip() | |
outbuf.append(s) | |
logger.debug('%s: %s' % (name, s)) | |
stream.close() | |
def get_sign_command(self, filename, signer, sign_password, | |
keystore=None): | |
""" | |
Return a suitable command for signing a file. | |
:param filename: The pathname to the file to be signed. | |
:param signer: The identifier of the signer of the file. | |
:param sign_password: The passphrase for the signer's | |
private key used for signing. | |
:param keystore: The path to a directory which contains the keys | |
used in verification. If not specified, the | |
instance's ``gpg_home`` attribute is used instead. | |
:return: The signing command as a list suitable to be | |
passed to :class:`subprocess.Popen`. | |
""" | |
cmd = [self.gpg, '--status-fd', '2', '--no-tty'] | |
if keystore is None: | |
keystore = self.gpg_home | |
if keystore: | |
cmd.extend(['--homedir', keystore]) | |
if sign_password is not None: | |
cmd.extend(['--batch', '--passphrase-fd', '0']) | |
td = tempfile.mkdtemp() | |
sf = os.path.join(td, os.path.basename(filename) + '.asc') | |
cmd.extend(['--detach-sign', '--armor', '--local-user', | |
signer, '--output', sf, filename]) | |
logger.debug('invoking: %s', ' '.join(cmd)) | |
return cmd, sf | |
def run_command(self, cmd, input_data=None): | |
""" | |
Run a command in a child process , passing it any input data specified. | |
:param cmd: The command to run. | |
:param input_data: If specified, this must be a byte string containing | |
data to be sent to the child process. | |
:return: A tuple consisting of the subprocess' exit code, a list of | |
lines read from the subprocess' ``stdout``, and a list of | |
lines read from the subprocess' ``stderr``. | |
""" | |
kwargs = { | |
'stdout': subprocess.PIPE, | |
'stderr': subprocess.PIPE, | |
} | |
if input_data is not None: | |
kwargs['stdin'] = subprocess.PIPE | |
stdout = [] | |
stderr = [] | |
p = subprocess.Popen(cmd, **kwargs) | |
# We don't use communicate() here because we may need to | |
# get clever with interacting with the command | |
t1 = Thread(target=self._reader, args=('stdout', p.stdout, stdout)) | |
t1.start() | |
t2 = Thread(target=self._reader, args=('stderr', p.stderr, stderr)) | |
t2.start() | |
if input_data is not None: | |
p.stdin.write(input_data) | |
p.stdin.close() | |
p.wait() | |
t1.join() | |
t2.join() | |
return p.returncode, stdout, stderr | |
def sign_file(self, filename, signer, sign_password, keystore=None): | |
""" | |
Sign a file. | |
:param filename: The pathname to the file to be signed. | |
:param signer: The identifier of the signer of the file. | |
:param sign_password: The passphrase for the signer's | |
private key used for signing. | |
:param keystore: The path to a directory which contains the keys | |
used in signing. If not specified, the instance's | |
``gpg_home`` attribute is used instead. | |
:return: The absolute pathname of the file where the signature is | |
stored. | |
""" | |
cmd, sig_file = self.get_sign_command(filename, signer, sign_password, | |
keystore) | |
rc, stdout, stderr = self.run_command(cmd, | |
sign_password.encode('utf-8')) | |
if rc != 0: | |
raise DistlibException('sign command failed with error ' | |
'code %s' % rc) | |
return sig_file | |
def upload_file(self, metadata, filename, signer=None, sign_password=None, | |
filetype='sdist', pyversion='source', keystore=None): | |
""" | |
Upload a release file to the index. | |
:param metadata: A :class:`Metadata` instance defining at least a name | |
and version number for the file to be uploaded. | |
:param filename: The pathname of the file to be uploaded. | |
:param signer: The identifier of the signer of the file. | |
:param sign_password: The passphrase for the signer's | |
private key used for signing. | |
:param filetype: The type of the file being uploaded. This is the | |
distutils command which produced that file, e.g. | |
``sdist`` or ``bdist_wheel``. | |
:param pyversion: The version of Python which the release relates | |
to. For code compatible with any Python, this would | |
be ``source``, otherwise it would be e.g. ``3.2``. | |
:param keystore: The path to a directory which contains the keys | |
used in signing. If not specified, the instance's | |
``gpg_home`` attribute is used instead. | |
:return: The HTTP response received from PyPI upon submission of the | |
request. | |
""" | |
self.check_credentials() | |
if not os.path.exists(filename): | |
raise DistlibException('not found: %s' % filename) | |
metadata.validate() | |
d = metadata.todict() | |
sig_file = None | |
if signer: | |
if not self.gpg: | |
logger.warning('no signing program available - not signed') | |
else: | |
sig_file = self.sign_file(filename, signer, sign_password, | |
keystore) | |
with open(filename, 'rb') as f: | |
file_data = f.read() | |
md5_digest = hashlib.md5(file_data).hexdigest() | |
sha256_digest = hashlib.sha256(file_data).hexdigest() | |
d.update({ | |
':action': 'file_upload', | |
'protocol_version': '1', | |
'filetype': filetype, | |
'pyversion': pyversion, | |
'md5_digest': md5_digest, | |
'sha256_digest': sha256_digest, | |
}) | |
files = [('content', os.path.basename(filename), file_data)] | |
if sig_file: | |
with open(sig_file, 'rb') as f: | |
sig_data = f.read() | |
files.append(('gpg_signature', os.path.basename(sig_file), | |
sig_data)) | |
shutil.rmtree(os.path.dirname(sig_file)) | |
request = self.encode_request(d.items(), files) | |
return self.send_request(request) | |
def upload_documentation(self, metadata, doc_dir): | |
""" | |
Upload documentation to the index. | |
:param metadata: A :class:`Metadata` instance defining at least a name | |
and version number for the documentation to be | |
uploaded. | |
:param doc_dir: The pathname of the directory which contains the | |
documentation. This should be the directory that | |
contains the ``index.html`` for the documentation. | |
:return: The HTTP response received from PyPI upon submission of the | |
request. | |
""" | |
self.check_credentials() | |
if not os.path.isdir(doc_dir): | |
raise DistlibException('not a directory: %r' % doc_dir) | |
fn = os.path.join(doc_dir, 'index.html') | |
if not os.path.exists(fn): | |
raise DistlibException('not found: %r' % fn) | |
metadata.validate() | |
name, version = metadata.name, metadata.version | |
zip_data = zip_dir(doc_dir).getvalue() | |
fields = [(':action', 'doc_upload'), | |
('name', name), ('version', version)] | |
files = [('content', name, zip_data)] | |
request = self.encode_request(fields, files) | |
return self.send_request(request) | |
def get_verify_command(self, signature_filename, data_filename, | |
keystore=None): | |
""" | |
Return a suitable command for verifying a file. | |
:param signature_filename: The pathname to the file containing the | |
signature. | |
:param data_filename: The pathname to the file containing the | |
signed data. | |
:param keystore: The path to a directory which contains the keys | |
used in verification. If not specified, the | |
instance's ``gpg_home`` attribute is used instead. | |
:return: The verifying command as a list suitable to be | |
passed to :class:`subprocess.Popen`. | |
""" | |
cmd = [self.gpg, '--status-fd', '2', '--no-tty'] | |
if keystore is None: | |
keystore = self.gpg_home | |
if keystore: | |
cmd.extend(['--homedir', keystore]) | |
cmd.extend(['--verify', signature_filename, data_filename]) | |
logger.debug('invoking: %s', ' '.join(cmd)) | |
return cmd | |
def verify_signature(self, signature_filename, data_filename, | |
keystore=None): | |
""" | |
Verify a signature for a file. | |
:param signature_filename: The pathname to the file containing the | |
signature. | |
:param data_filename: The pathname to the file containing the | |
signed data. | |
:param keystore: The path to a directory which contains the keys | |
used in verification. If not specified, the | |
instance's ``gpg_home`` attribute is used instead. | |
:return: True if the signature was verified, else False. | |
""" | |
if not self.gpg: | |
raise DistlibException('verification unavailable because gpg ' | |
'unavailable') | |
cmd = self.get_verify_command(signature_filename, data_filename, | |
keystore) | |
rc, stdout, stderr = self.run_command(cmd) | |
if rc not in (0, 1): | |
raise DistlibException('verify command failed with error ' | |
'code %s' % rc) | |
return rc == 0 | |
def download_file(self, url, destfile, digest=None, reporthook=None): | |
""" | |
This is a convenience method for downloading a file from an URL. | |
Normally, this will be a file from the index, though currently | |
no check is made for this (i.e. a file can be downloaded from | |
anywhere). | |
The method is just like the :func:`urlretrieve` function in the | |
standard library, except that it allows digest computation to be | |
done during download and checking that the downloaded data | |
matched any expected value. | |
:param url: The URL of the file to be downloaded (assumed to be | |
available via an HTTP GET request). | |
:param destfile: The pathname where the downloaded file is to be | |
saved. | |
:param digest: If specified, this must be a (hasher, value) | |
tuple, where hasher is the algorithm used (e.g. | |
``'md5'``) and ``value`` is the expected value. | |
:param reporthook: The same as for :func:`urlretrieve` in the | |
standard library. | |
""" | |
if digest is None: | |
digester = None | |
logger.debug('No digest specified') | |
else: | |
if isinstance(digest, (list, tuple)): | |
hasher, digest = digest | |
else: | |
hasher = 'md5' | |
digester = getattr(hashlib, hasher)() | |
logger.debug('Digest specified: %s' % digest) | |
# The following code is equivalent to urlretrieve. | |
# We need to do it this way so that we can compute the | |
# digest of the file as we go. | |
with open(destfile, 'wb') as dfp: | |
# addinfourl is not a context manager on 2.x | |
# so we have to use try/finally | |
sfp = self.send_request(Request(url)) | |
try: | |
headers = sfp.info() | |
blocksize = 8192 | |
size = -1 | |
read = 0 | |
blocknum = 0 | |
if "content-length" in headers: | |
size = int(headers["Content-Length"]) | |
if reporthook: | |
reporthook(blocknum, blocksize, size) | |
while True: | |
block = sfp.read(blocksize) | |
if not block: | |
break | |
read += len(block) | |
dfp.write(block) | |
if digester: | |
digester.update(block) | |
blocknum += 1 | |
if reporthook: | |
reporthook(blocknum, blocksize, size) | |
finally: | |
sfp.close() | |
# check that we got the whole file, if we can | |
if size >= 0 and read < size: | |
raise DistlibException( | |
'retrieval incomplete: got only %d out of %d bytes' | |
% (read, size)) | |
# if we have a digest, it must match. | |
if digester: | |
actual = digester.hexdigest() | |
if digest != actual: | |
raise DistlibException('%s digest mismatch for %s: expected ' | |
'%s, got %s' % (hasher, destfile, | |
digest, actual)) | |
logger.debug('Digest verified: %s', digest) | |
def send_request(self, req): | |
""" | |
Send a standard library :class:`Request` to PyPI and return its | |
response. | |
:param req: The request to send. | |
:return: The HTTP response from PyPI (a standard library HTTPResponse). | |
""" | |
handlers = [] | |
if self.password_handler: | |
handlers.append(self.password_handler) | |
if self.ssl_verifier: | |
handlers.append(self.ssl_verifier) | |
opener = build_opener(*handlers) | |
return opener.open(req) | |
def encode_request(self, fields, files): | |
""" | |
Encode fields and files for posting to an HTTP server. | |
:param fields: The fields to send as a list of (fieldname, value) | |
tuples. | |
:param files: The files to send as a list of (fieldname, filename, | |
file_bytes) tuple. | |
""" | |
# Adapted from packaging, which in turn was adapted from | |
# http://code.activestate.com/recipes/146306 | |
parts = [] | |
boundary = self.boundary | |
for k, values in fields: | |
if not isinstance(values, (list, tuple)): | |
values = [values] | |
for v in values: | |
parts.extend(( | |
b'--' + boundary, | |
('Content-Disposition: form-data; name="%s"' % | |
k).encode('utf-8'), | |
b'', | |
v.encode('utf-8'))) | |
for key, filename, value in files: | |
parts.extend(( | |
b'--' + boundary, | |
('Content-Disposition: form-data; name="%s"; filename="%s"' % | |
(key, filename)).encode('utf-8'), | |
b'', | |
value)) | |
parts.extend((b'--' + boundary + b'--', b'')) | |
body = b'\r\n'.join(parts) | |
ct = b'multipart/form-data; boundary=' + boundary | |
headers = { | |
'Content-type': ct, | |
'Content-length': str(len(body)) | |
} | |
return Request(self.url, body, headers) | |
def search(self, terms, operator=None): | |
if isinstance(terms, string_types): | |
terms = {'name': terms} | |
if self.rpc_proxy is None: | |
self.rpc_proxy = ServerProxy(self.url, timeout=3.0) | |
return self.rpc_proxy.search(terms, operator or 'and') |
# -*- coding: utf-8 -*- | |
# | |
# Copyright (C) 2012-2015 Vinay Sajip. | |
# Licensed to the Python Software Foundation under a contributor agreement. | |
# See LICENSE.txt and CONTRIBUTORS.txt. | |
# | |
import gzip | |
from io import BytesIO | |
import json | |
import logging | |
import os | |
import posixpath | |
import re | |
try: | |
import threading | |
except ImportError: # pragma: no cover | |
import dummy_threading as threading | |
import zlib | |
from . import DistlibException | |
from .compat import (urljoin, urlparse, urlunparse, url2pathname, pathname2url, | |
queue, quote, unescape, string_types, build_opener, | |
HTTPRedirectHandler as BaseRedirectHandler, text_type, | |
Request, HTTPError, URLError) | |
from .database import Distribution, DistributionPath, make_dist | |
from .metadata import Metadata | |
from .util import (cached_property, parse_credentials, ensure_slash, | |
split_filename, get_project_data, parse_requirement, | |
parse_name_and_version, ServerProxy, normalize_name) | |
from .version import get_scheme, UnsupportedVersionError | |
from .wheel import Wheel, is_compatible | |
logger = logging.getLogger(__name__) | |
HASHER_HASH = re.compile('^(\w+)=([a-f0-9]+)') | |
CHARSET = re.compile(r';\s*charset\s*=\s*(.*)\s*$', re.I) | |
HTML_CONTENT_TYPE = re.compile('text/html|application/x(ht)?ml') | |
DEFAULT_INDEX = 'https://pypi.python.org/pypi' | |
def get_all_distribution_names(url=None): | |
""" | |
Return all distribution names known by an index. | |
:param url: The URL of the index. | |
:return: A list of all known distribution names. | |
""" | |
if url is None: | |
url = DEFAULT_INDEX | |
client = ServerProxy(url, timeout=3.0) | |
return client.list_packages() | |
class RedirectHandler(BaseRedirectHandler): | |
""" | |
A class to work around a bug in some Python 3.2.x releases. | |
""" | |
# There's a bug in the base version for some 3.2.x | |
# (e.g. 3.2.2 on Ubuntu Oneiric). If a Location header | |
# returns e.g. /abc, it bails because it says the scheme '' | |
# is bogus, when actually it should use the request's | |
# URL for the scheme. See Python issue #13696. | |
def http_error_302(self, req, fp, code, msg, headers): | |
# Some servers (incorrectly) return multiple Location headers | |
# (so probably same goes for URI). Use first header. | |
newurl = None | |
for key in ('location', 'uri'): | |
if key in headers: | |
newurl = headers[key] | |
break | |
if newurl is None: | |
return | |
urlparts = urlparse(newurl) | |
if urlparts.scheme == '': | |
newurl = urljoin(req.get_full_url(), newurl) | |
if hasattr(headers, 'replace_header'): | |
headers.replace_header(key, newurl) | |
else: | |
headers[key] = newurl | |
return BaseRedirectHandler.http_error_302(self, req, fp, code, msg, | |
headers) | |
http_error_301 = http_error_303 = http_error_307 = http_error_302 | |
class Locator(object): | |
""" | |
A base class for locators - things that locate distributions. | |
""" | |
source_extensions = ('.tar.gz', '.tar.bz2', '.tar', '.zip', '.tgz', '.tbz') | |
binary_extensions = ('.egg', '.exe', '.whl') | |
excluded_extensions = ('.pdf',) | |
# A list of tags indicating which wheels you want to match. The default | |
# value of None matches against the tags compatible with the running | |
# Python. If you want to match other values, set wheel_tags on a locator | |
# instance to a list of tuples (pyver, abi, arch) which you want to match. | |
wheel_tags = None | |
downloadable_extensions = source_extensions + ('.whl',) | |
def __init__(self, scheme='default'): | |
""" | |
Initialise an instance. | |
:param scheme: Because locators look for most recent versions, they | |
need to know the version scheme to use. This specifies | |
the current PEP-recommended scheme - use ``'legacy'`` | |
if you need to support existing distributions on PyPI. | |
""" | |
self._cache = {} | |
self.scheme = scheme | |
# Because of bugs in some of the handlers on some of the platforms, | |
# we use our own opener rather than just using urlopen. | |
self.opener = build_opener(RedirectHandler()) | |
# If get_project() is called from locate(), the matcher instance | |
# is set from the requirement passed to locate(). See issue #18 for | |
# why this can be useful to know. | |
self.matcher = None | |
self.errors = queue.Queue() | |
def get_errors(self): | |
""" | |
Return any errors which have occurred. | |
""" | |
result = [] | |
while not self.errors.empty(): # pragma: no cover | |
try: | |
e = self.errors.get(False) | |
result.append(e) | |
except self.errors.Empty: | |
continue | |
self.errors.task_done() | |
return result | |
def clear_errors(self): | |
""" | |
Clear any errors which may have been logged. | |
""" | |
# Just get the errors and throw them away | |
self.get_errors() | |
def clear_cache(self): | |
self._cache.clear() | |
def _get_scheme(self): | |
return self._scheme | |
def _set_scheme(self, value): | |
self._scheme = value | |
scheme = property(_get_scheme, _set_scheme) | |
def _get_project(self, name): | |
""" | |
For a given project, get a dictionary mapping available versions to Distribution | |
instances. | |
This should be implemented in subclasses. | |
If called from a locate() request, self.matcher will be set to a | |
matcher for the requirement to satisfy, otherwise it will be None. | |
""" | |
raise NotImplementedError('Please implement in the subclass') | |
def get_distribution_names(self): | |
""" | |
Return all the distribution names known to this locator. | |
""" | |
raise NotImplementedError('Please implement in the subclass') | |
def get_project(self, name): | |
""" | |
For a given project, get a dictionary mapping available versions to Distribution | |
instances. | |
This calls _get_project to do all the work, and just implements a caching layer on top. | |
""" | |
if self._cache is None: | |
result = self._get_project(name) | |
elif name in self._cache: | |
result = self._cache[name] | |
else: | |
self.clear_errors() | |
result = self._get_project(name) | |
self._cache[name] = result | |
return result | |
def score_url(self, url): | |
""" | |
Give an url a score which can be used to choose preferred URLs | |
for a given project release. | |
""" | |
t = urlparse(url) | |
basename = posixpath.basename(t.path) | |
compatible = True | |
is_wheel = basename.endswith('.whl') | |
if is_wheel: | |
compatible = is_compatible(Wheel(basename), self.wheel_tags) | |
return (t.scheme != 'https', 'pypi.python.org' in t.netloc, | |
is_wheel, compatible, basename) | |
def prefer_url(self, url1, url2): | |
""" | |
Choose one of two URLs where both are candidates for distribution | |
archives for the same version of a distribution (for example, | |
.tar.gz vs. zip). | |
The current implementation favours https:// URLs over http://, archives | |
from PyPI over those from other locations, wheel compatibility (if a | |
wheel) and then the archive name. | |
""" | |
result = url2 | |
if url1: | |
s1 = self.score_url(url1) | |
s2 = self.score_url(url2) | |
if s1 > s2: | |
result = url1 | |
if result != url2: | |
logger.debug('Not replacing %r with %r', url1, url2) | |
else: | |
logger.debug('Replacing %r with %r', url1, url2) | |
return result | |
def split_filename(self, filename, project_name): | |
""" | |
Attempt to split a filename in project name, version and Python version. | |
""" | |
return split_filename(filename, project_name) | |
def convert_url_to_download_info(self, url, project_name): | |
""" | |
See if a URL is a candidate for a download URL for a project (the URL | |
has typically been scraped from an HTML page). | |
If it is, a dictionary is returned with keys "name", "version", | |
"filename" and "url"; otherwise, None is returned. | |
""" | |
def same_project(name1, name2): | |
return normalize_name(name1) == normalize_name(name2) | |
result = None | |
scheme, netloc, path, params, query, frag = urlparse(url) | |
if frag.lower().startswith('egg='): | |
logger.debug('%s: version hint in fragment: %r', | |
project_name, frag) | |
m = HASHER_HASH.match(frag) | |
if m: | |
algo, digest = m.groups() | |
else: | |
algo, digest = None, None | |
origpath = path | |
if path and path[-1] == '/': | |
path = path[:-1] | |
if path.endswith('.whl'): | |
try: | |
wheel = Wheel(path) | |
if is_compatible(wheel, self.wheel_tags): | |
if project_name is None: | |
include = True | |
else: | |
include = same_project(wheel.name, project_name) | |
if include: | |
result = { | |
'name': wheel.name, | |
'version': wheel.version, | |
'filename': wheel.filename, | |
'url': urlunparse((scheme, netloc, origpath, | |
params, query, '')), | |
'python-version': ', '.join( | |
['.'.join(list(v[2:])) for v in wheel.pyver]), | |
} | |
except Exception as e: # pragma: no cover | |
logger.warning('invalid path for wheel: %s', path) | |
elif path.endswith(self.downloadable_extensions): | |
path = filename = posixpath.basename(path) | |
for ext in self.downloadable_extensions: | |
if path.endswith(ext): | |
path = path[:-len(ext)] | |
t = self.split_filename(path, project_name) | |
if not t: | |
logger.debug('No match for project/version: %s', path) | |
else: | |
name, version, pyver = t | |
if not project_name or same_project(project_name, name): | |
result = { | |
'name': name, | |
'version': version, | |
'filename': filename, | |
'url': urlunparse((scheme, netloc, origpath, | |
params, query, '')), | |
#'packagetype': 'sdist', | |
} | |
if pyver: | |
result['python-version'] = pyver | |
break | |
if result and algo: | |
result['%s_digest' % algo] = digest | |
return result | |
def _get_digest(self, info): | |
""" | |
Get a digest from a dictionary by looking at keys of the form | |
'algo_digest'. | |
Returns a 2-tuple (algo, digest) if found, else None. Currently | |
looks only for SHA256, then MD5. | |
""" | |
result = None | |
for algo in ('sha256', 'md5'): | |
key = '%s_digest' % algo | |
if key in info: | |
result = (algo, info[key]) | |
break | |
return result | |
def _update_version_data(self, result, info): | |
""" | |
Update a result dictionary (the final result from _get_project) with a | |
dictionary for a specific version, which typically holds information | |
gleaned from a filename or URL for an archive for the distribution. | |
""" | |
name = info.pop('name') | |
version = info.pop('version') | |
if version in result: | |
dist = result[version] | |
md = dist.metadata | |
else: | |
dist = make_dist(name, version, scheme=self.scheme) | |
md = dist.metadata | |
dist.digest = digest = self._get_digest(info) | |
url = info['url'] | |
result['digests'][url] = digest | |
if md.source_url != info['url']: | |
md.source_url = self.prefer_url(md.source_url, url) | |
result['urls'].setdefault(version, set()).add(url) | |
dist.locator = self | |
result[version] = dist | |
def locate(self, requirement, prereleases=False): | |
""" | |
Find the most recent distribution which matches the given | |
requirement. | |
:param requirement: A requirement of the form 'foo (1.0)' or perhaps | |
'foo (>= 1.0, < 2.0, != 1.3)' | |
:param prereleases: If ``True``, allow pre-release versions | |
to be located. Otherwise, pre-release versions | |
are not returned. | |
:return: A :class:`Distribution` instance, or ``None`` if no such | |
distribution could be located. | |
""" | |
result = None | |
r = parse_requirement(requirement) | |
if r is None: | |
raise DistlibException('Not a valid requirement: %r' % requirement) | |
scheme = get_scheme(self.scheme) | |
self.matcher = matcher = scheme.matcher(r.requirement) | |
logger.debug('matcher: %s (%s)', matcher, type(matcher).__name__) | |
versions = self.get_project(r.name) | |
if len(versions) > 2: # urls and digests keys are present | |
# sometimes, versions are invalid | |
slist = [] | |
vcls = matcher.version_class | |
for k in versions: | |
if k in ('urls', 'digests'): | |
continue | |
try: | |
if not matcher.match(k): | |
logger.debug('%s did not match %r', matcher, k) | |
else: | |
if prereleases or not vcls(k).is_prerelease: | |
slist.append(k) | |
else: | |
logger.debug('skipping pre-release ' | |
'version %s of %s', k, matcher.name) | |
except Exception: # pragma: no cover | |
logger.warning('error matching %s with %r', matcher, k) | |
pass # slist.append(k) | |
if len(slist) > 1: | |
slist = sorted(slist, key=scheme.key) | |
if slist: | |
logger.debug('sorted list: %s', slist) | |
version = slist[-1] | |
result = versions[version] | |
if result: | |
if r.extras: | |
result.extras = r.extras | |
result.download_urls = versions.get('urls', {}).get(version, set()) | |
d = {} | |
sd = versions.get('digests', {}) | |
for url in result.download_urls: | |
if url in sd: | |
d[url] = sd[url] | |
result.digests = d | |
self.matcher = None | |
return result | |
class PyPIRPCLocator(Locator): | |
""" | |
This locator uses XML-RPC to locate distributions. It therefore | |
cannot be used with simple mirrors (that only mirror file content). | |
""" | |
def __init__(self, url, **kwargs): | |
""" | |
Initialise an instance. | |
:param url: The URL to use for XML-RPC. | |
:param kwargs: Passed to the superclass constructor. | |
""" | |
super(PyPIRPCLocator, self).__init__(**kwargs) | |
self.base_url = url | |
self.client = ServerProxy(url, timeout=3.0) | |
def get_distribution_names(self): | |
""" | |
Return all the distribution names known to this locator. | |
""" | |
return set(self.client.list_packages()) | |
def _get_project(self, name): | |
result = {'urls': {}, 'digests': {}} | |
versions = self.client.package_releases(name, True) | |
for v in versions: | |
urls = self.client.release_urls(name, v) | |
data = self.client.release_data(name, v) | |
metadata = Metadata(scheme=self.scheme) | |
metadata.name = data['name'] | |
metadata.version = data['version'] | |
metadata.license = data.get('license') | |
metadata.keywords = data.get('keywords', []) | |
metadata.summary = data.get('summary') | |
dist = Distribution(metadata) | |
if urls: | |
info = urls[0] | |
metadata.source_url = info['url'] | |
dist.digest = self._get_digest(info) | |
dist.locator = self | |
result[v] = dist | |
for info in urls: | |
url = info['url'] | |
digest = self._get_digest(info) | |
result['urls'].setdefault(v, set()).add(url) | |
result['digests'][url] = digest | |
return result | |
class PyPIJSONLocator(Locator): | |
""" | |
This locator uses PyPI's JSON interface. It's very limited in functionality | |
and probably not worth using. | |
""" | |
def __init__(self, url, **kwargs): | |
super(PyPIJSONLocator, self).__init__(**kwargs) | |
self.base_url = ensure_slash(url) | |
def get_distribution_names(self): | |
""" | |
Return all the distribution names known to this locator. | |
""" | |
raise NotImplementedError('Not available from this locator') | |
def _get_project(self, name): | |
result = {'urls': {}, 'digests': {}} | |
url = urljoin(self.base_url, '%s/json' % quote(name)) | |
try: | |
resp = self.opener.open(url) | |
data = resp.read().decode() # for now | |
d = json.loads(data) | |
md = Metadata(scheme=self.scheme) | |
data = d['info'] | |
md.name = data['name'] | |
md.version = data['version'] | |
md.license = data.get('license') | |
md.keywords = data.get('keywords', []) | |
md.summary = data.get('summary') | |
dist = Distribution(md) | |
dist.locator = self | |
urls = d['urls'] | |
result[md.version] = dist | |
for info in d['urls']: | |
url = info['url'] | |
dist.download_urls.add(url) | |
dist.digests[url] = self._get_digest(info) | |
result['urls'].setdefault(md.version, set()).add(url) | |
result['digests'][url] = self._get_digest(info) | |
# Now get other releases | |
for version, infos in d['releases'].items(): | |
if version == md.version: | |
continue # already done | |
omd = Metadata(scheme=self.scheme) | |
omd.name = md.name | |
omd.version = version | |
odist = Distribution(omd) | |
odist.locator = self | |
result[version] = odist | |
for info in infos: | |
url = info['url'] | |
odist.download_urls.add(url) | |
odist.digests[url] = self._get_digest(info) | |
result['urls'].setdefault(version, set()).add(url) | |
result['digests'][url] = self._get_digest(info) | |
# for info in urls: | |
# md.source_url = info['url'] | |
# dist.digest = self._get_digest(info) | |
# dist.locator = self | |
# for info in urls: | |
# url = info['url'] | |
# result['urls'].setdefault(md.version, set()).add(url) | |
# result['digests'][url] = self._get_digest(info) | |
except Exception as e: | |
self.errors.put(text_type(e)) | |
logger.exception('JSON fetch failed: %s', e) | |
return result | |
class Page(object): | |
""" | |
This class represents a scraped HTML page. | |
""" | |
# The following slightly hairy-looking regex just looks for the contents of | |
# an anchor link, which has an attribute "href" either immediately preceded | |
# or immediately followed by a "rel" attribute. The attribute values can be | |
# declared with double quotes, single quotes or no quotes - which leads to | |
# the length of the expression. | |
_href = re.compile(""" | |
(rel\s*=\s*(?:"(?P<rel1>[^"]*)"|'(?P<rel2>[^']*)'|(?P<rel3>[^>\s\n]*))\s+)? | |
href\s*=\s*(?:"(?P<url1>[^"]*)"|'(?P<url2>[^']*)'|(?P<url3>[^>\s\n]*)) | |
(\s+rel\s*=\s*(?:"(?P<rel4>[^"]*)"|'(?P<rel5>[^']*)'|(?P<rel6>[^>\s\n]*)))? | |
""", re.I | re.S | re.X) | |
_base = re.compile(r"""<base\s+href\s*=\s*['"]?([^'">]+)""", re.I | re.S) | |
def __init__(self, data, url): | |
""" | |
Initialise an instance with the Unicode page contents and the URL they | |
came from. | |
""" | |
self.data = data | |
self.base_url = self.url = url | |
m = self._base.search(self.data) | |
if m: | |
self.base_url = m.group(1) | |
_clean_re = re.compile(r'[^a-z0-9$&+,/:;=?@.#%_\\|-]', re.I) | |
@cached_property | |
def links(self): | |
""" | |
Return the URLs of all the links on a page together with information | |
about their "rel" attribute, for determining which ones to treat as | |
downloads and which ones to queue for further scraping. | |
""" | |
def clean(url): | |
"Tidy up an URL." | |
scheme, netloc, path, params, query, frag = urlparse(url) | |
return urlunparse((scheme, netloc, quote(path), | |
params, query, frag)) | |
result = set() | |
for match in self._href.finditer(self.data): | |
d = match.groupdict('') | |
rel = (d['rel1'] or d['rel2'] or d['rel3'] or | |
d['rel4'] or d['rel5'] or d['rel6']) | |
url = d['url1'] or d['url2'] or d['url3'] | |
url = urljoin(self.base_url, url) | |
url = unescape(url) | |
url = self._clean_re.sub(lambda m: '%%%2x' % ord(m.group(0)), url) | |
result.add((url, rel)) | |
# We sort the result, hoping to bring the most recent versions | |
# to the front | |
result = sorted(result, key=lambda t: t[0], reverse=True) | |
return result | |
class SimpleScrapingLocator(Locator): | |
""" | |
A locator which scrapes HTML pages to locate downloads for a distribution. | |
This runs multiple threads to do the I/O; performance is at least as good | |
as pip's PackageFinder, which works in an analogous fashion. | |
""" | |
# These are used to deal with various Content-Encoding schemes. | |
decoders = { | |
'deflate': zlib.decompress, | |
'gzip': lambda b: gzip.GzipFile(fileobj=BytesIO(d)).read(), | |
'none': lambda b: b, | |
} | |
def __init__(self, url, timeout=None, num_workers=10, **kwargs): | |
""" | |
Initialise an instance. | |
:param url: The root URL to use for scraping. | |
:param timeout: The timeout, in seconds, to be applied to requests. | |
This defaults to ``None`` (no timeout specified). | |
:param num_workers: The number of worker threads you want to do I/O, | |
This defaults to 10. | |
:param kwargs: Passed to the superclass. | |
""" | |
super(SimpleScrapingLocator, self).__init__(**kwargs) | |
self.base_url = ensure_slash(url) | |
self.timeout = timeout | |
self._page_cache = {} | |
self._seen = set() | |
self._to_fetch = queue.Queue() | |
self._bad_hosts = set() | |
self.skip_externals = False | |
self.num_workers = num_workers | |
self._lock = threading.RLock() | |
# See issue #45: we need to be resilient when the locator is used | |
# in a thread, e.g. with concurrent.futures. We can't use self._lock | |
# as it is for coordinating our internal threads - the ones created | |
# in _prepare_threads. | |
self._gplock = threading.RLock() | |
def _prepare_threads(self): | |
""" | |
Threads are created only when get_project is called, and terminate | |
before it returns. They are there primarily to parallelise I/O (i.e. | |
fetching web pages). | |
""" | |
self._threads = [] | |
for i in range(self.num_workers): | |
t = threading.Thread(target=self._fetch) | |
t.setDaemon(True) | |
t.start() | |
self._threads.append(t) | |
def _wait_threads(self): | |
""" | |
Tell all the threads to terminate (by sending a sentinel value) and | |
wait for them to do so. | |
""" | |
# Note that you need two loops, since you can't say which | |
# thread will get each sentinel | |
for t in self._threads: | |
self._to_fetch.put(None) # sentinel | |
for t in self._threads: | |
t.join() | |
self._threads = [] | |
def _get_project(self, name): | |
result = {'urls': {}, 'digests': {}} | |
with self._gplock: | |
self.result = result | |
self.project_name = name | |
url = urljoin(self.base_url, '%s/' % quote(name)) | |
self._seen.clear() | |
self._page_cache.clear() | |
self._prepare_threads() | |
try: | |
logger.debug('Queueing %s', url) | |
self._to_fetch.put(url) | |
self._to_fetch.join() | |
finally: | |
self._wait_threads() | |
del self.result | |
return result | |
platform_dependent = re.compile(r'\b(linux-(i\d86|x86_64|arm\w+)|' | |
r'win(32|-amd64)|macosx-?\d+)\b', re.I) | |
def _is_platform_dependent(self, url): | |
""" | |
Does an URL refer to a platform-specific download? | |
""" | |
return self.platform_dependent.search(url) | |
def _process_download(self, url): | |
""" | |
See if an URL is a suitable download for a project. | |
If it is, register information in the result dictionary (for | |
_get_project) about the specific version it's for. | |
Note that the return value isn't actually used other than as a boolean | |
value. | |
""" | |
if self._is_platform_dependent(url): | |
info = None | |
else: | |
info = self.convert_url_to_download_info(url, self.project_name) | |
logger.debug('process_download: %s -> %s', url, info) | |
if info: | |
with self._lock: # needed because self.result is shared | |
self._update_version_data(self.result, info) | |
return info | |
def _should_queue(self, link, referrer, rel): | |
""" | |
Determine whether a link URL from a referring page and with a | |
particular "rel" attribute should be queued for scraping. | |
""" | |
scheme, netloc, path, _, _, _ = urlparse(link) | |
if path.endswith(self.source_extensions + self.binary_extensions + | |
self.excluded_extensions): | |
result = False | |
elif self.skip_externals and not link.startswith(self.base_url): | |
result = False | |
elif not referrer.startswith(self.base_url): | |
result = False | |
elif rel not in ('homepage', 'download'): | |
result = False | |
elif scheme not in ('http', 'https', 'ftp'): | |
result = False | |
elif self._is_platform_dependent(link): | |
result = False | |
else: | |
host = netloc.split(':', 1)[0] | |
if host.lower() == 'localhost': | |
result = False | |
else: | |
result = True | |
logger.debug('should_queue: %s (%s) from %s -> %s', link, rel, | |
referrer, result) | |
return result | |
def _fetch(self): | |
""" | |
Get a URL to fetch from the work queue, get the HTML page, examine its | |
links for download candidates and candidates for further scraping. | |
This is a handy method to run in a thread. | |
""" | |
while True: | |
url = self._to_fetch.get() | |
try: | |
if url: | |
page = self.get_page(url) | |
if page is None: # e.g. after an error | |
continue | |
for link, rel in page.links: | |
if link not in self._seen: | |
self._seen.add(link) | |
if (not self._process_download(link) and | |
self._should_queue(link, url, rel)): | |
logger.debug('Queueing %s from %s', link, url) | |
self._to_fetch.put(link) | |
except Exception as e: # pragma: no cover | |
self.errors.put(text_type(e)) | |
finally: | |
# always do this, to avoid hangs :-) | |
self._to_fetch.task_done() | |
if not url: | |
#logger.debug('Sentinel seen, quitting.') | |
break | |
def get_page(self, url): | |
""" | |
Get the HTML for an URL, possibly from an in-memory cache. | |
XXX TODO Note: this cache is never actually cleared. It's assumed that | |
the data won't get stale over the lifetime of a locator instance (not | |
necessarily true for the default_locator). | |
""" | |
# http://peak.telecommunity.com/DevCenter/EasyInstall#package-index-api | |
scheme, netloc, path, _, _, _ = urlparse(url) | |
if scheme == 'file' and os.path.isdir(url2pathname(path)): | |
url = urljoin(ensure_slash(url), 'index.html') | |
if url in self._page_cache: | |
result = self._page_cache[url] | |
logger.debug('Returning %s from cache: %s', url, result) | |
else: | |
host = netloc.split(':', 1)[0] | |
result = None | |
if host in self._bad_hosts: | |
logger.debug('Skipping %s due to bad host %s', url, host) | |
else: | |
req = Request(url, headers={'Accept-encoding': 'identity'}) | |
try: | |
logger.debug('Fetching %s', url) | |
resp = self.opener.open(req, timeout=self.timeout) | |
logger.debug('Fetched %s', url) | |
headers = resp.info() | |
content_type = headers.get('Content-Type', '') | |
if HTML_CONTENT_TYPE.match(content_type): | |
final_url = resp.geturl() | |
data = resp.read() | |
encoding = headers.get('Content-Encoding') | |
if encoding: | |
decoder = self.decoders[encoding] # fail if not found | |
data = decoder(data) | |
encoding = 'utf-8' | |
m = CHARSET.search(content_type) | |
if m: | |
encoding = m.group(1) | |
try: | |
data = data.decode(encoding) | |
except UnicodeError: # pragma: no cover | |
data = data.decode('latin-1') # fallback | |
result = Page(data, final_url) | |
self._page_cache[final_url] = result | |
except HTTPError as e: | |
if e.code != 404: | |
logger.exception('Fetch failed: %s: %s', url, e) | |
except URLError as e: # pragma: no cover | |
logger.exception('Fetch failed: %s: %s', url, e) | |
with self._lock: | |
self._bad_hosts.add(host) | |
except Exception as e: # pragma: no cover | |
logger.exception('Fetch failed: %s: %s', url, e) | |
finally: | |
self._page_cache[url] = result # even if None (failure) | |
return result | |
_distname_re = re.compile('<a href=[^>]*>([^<]+)<') | |
def get_distribution_names(self): | |
""" | |
Return all the distribution names known to this locator. | |
""" | |
result = set() | |
page = self.get_page(self.base_url) | |
if not page: | |
raise DistlibException('Unable to get %s' % self.base_url) | |
for match in self._distname_re.finditer(page.data): | |
result.add(match.group(1)) | |
return result | |
class DirectoryLocator(Locator): | |
""" | |
This class locates distributions in a directory tree. | |
""" | |
def __init__(self, path, **kwargs): | |
""" | |
Initialise an instance. | |
:param path: The root of the directory tree to search. | |
:param kwargs: Passed to the superclass constructor, | |
except for: | |
* recursive - if True (the default), subdirectories are | |
recursed into. If False, only the top-level directory | |
is searched, | |
""" | |
self.recursive = kwargs.pop('recursive', True) | |
super(DirectoryLocator, self).__init__(**kwargs) | |
path = os.path.abspath(path) | |
if not os.path.isdir(path): # pragma: no cover | |
raise DistlibException('Not a directory: %r' % path) | |
self.base_dir = path | |
def should_include(self, filename, parent): | |
""" | |
Should a filename be considered as a candidate for a distribution | |
archive? As well as the filename, the directory which contains it | |
is provided, though not used by the current implementation. | |
""" | |
return filename.endswith(self.downloadable_extensions) | |
def _get_project(self, name): | |
result = {'urls': {}, 'digests': {}} | |
for root, dirs, files in os.walk(self.base_dir): | |
for fn in files: | |
if self.should_include(fn, root): | |
fn = os.path.join(root, fn) | |
url = urlunparse(('file', '', | |
pathname2url(os.path.abspath(fn)), | |
'', '', '')) | |
info = self.convert_url_to_download_info(url, name) | |
if info: | |
self._update_version_data(result, info) | |
if not self.recursive: | |
break | |
return result | |
def get_distribution_names(self): | |
""" | |
Return all the distribution names known to this locator. | |
""" | |
result = set() | |
for root, dirs, files in os.walk(self.base_dir): | |
for fn in files: | |
if self.should_include(fn, root): | |
fn = os.path.join(root, fn) | |
url = urlunparse(('file', '', | |
pathname2url(os.path.abspath(fn)), | |
'', '', '')) | |
info = self.convert_url_to_download_info(url, None) | |
if info: | |
result.add(info['name']) | |
if not self.recursive: | |
break | |
return result | |
class JSONLocator(Locator): | |
""" | |
This locator uses special extended metadata (not available on PyPI) and is | |
the basis of performant dependency resolution in distlib. Other locators | |
require archive downloads before dependencies can be determined! As you | |
might imagine, that can be slow. | |
""" | |
def get_distribution_names(self): | |
""" | |
Return all the distribution names known to this locator. | |
""" | |
raise NotImplementedError('Not available from this locator') | |
def _get_project(self, name): | |
result = {'urls': {}, 'digests': {}} | |
data = get_project_data(name) | |
if data: | |
for info in data.get('files', []): | |
if info['ptype'] != 'sdist' or info['pyversion'] != 'source': | |
continue | |
# We don't store summary in project metadata as it makes | |
# the data bigger for no benefit during dependency | |
# resolution | |
dist = make_dist(data['name'], info['version'], | |
summary=data.get('summary', | |
'Placeholder for summary'), | |
scheme=self.scheme) | |
md = dist.metadata | |
md.source_url = info['url'] | |
# TODO SHA256 digest | |
if 'digest' in info and info['digest']: | |
dist.digest = ('md5', info['digest']) | |
md.dependencies = info.get('requirements', {}) | |
dist.exports = info.get('exports', {}) | |
result[dist.version] = dist | |
result['urls'].setdefault(dist.version, set()).add(info['url']) | |
return result | |
class DistPathLocator(Locator): | |
""" | |
This locator finds installed distributions in a path. It can be useful for | |
adding to an :class:`AggregatingLocator`. | |
""" | |
def __init__(self, distpath, **kwargs): | |
""" | |
Initialise an instance. | |
:param distpath: A :class:`DistributionPath` instance to search. | |
""" | |
super(DistPathLocator, self).__init__(**kwargs) | |
assert isinstance(distpath, DistributionPath) | |
self.distpath = distpath | |
def _get_project(self, name): | |
dist = self.distpath.get_distribution(name) | |
if dist is None: | |
result = {'urls': {}, 'digests': {}} | |
else: | |
result = { | |
dist.version: dist, | |
'urls': {dist.version: set([dist.source_url])}, | |
'digests': {dist.version: set([None])} | |
} | |
return result | |
class AggregatingLocator(Locator): | |
""" | |
This class allows you to chain and/or merge a list of locators. | |
""" | |
def __init__(self, *locators, **kwargs): | |
""" | |
Initialise an instance. | |
:param locators: The list of locators to search. | |
:param kwargs: Passed to the superclass constructor, | |
except for: | |
* merge - if False (the default), the first successful | |
search from any of the locators is returned. If True, | |
the results from all locators are merged (this can be | |
slow). | |
""" | |
self.merge = kwargs.pop('merge', False) | |
self.locators = locators | |
super(AggregatingLocator, self).__init__(**kwargs) | |
def clear_cache(self): | |
super(AggregatingLocator, self).clear_cache() | |
for locator in self.locators: | |
locator.clear_cache() | |
def _set_scheme(self, value): | |
self._scheme = value | |
for locator in self.locators: | |
locator.scheme = value | |
scheme = property(Locator.scheme.fget, _set_scheme) | |
def _get_project(self, name): | |
result = {} | |
for locator in self.locators: | |
d = locator.get_project(name) | |
if d: | |
if self.merge: | |
files = result.get('urls', {}) | |
digests = result.get('digests', {}) | |
# next line could overwrite result['urls'], result['digests'] | |
result.update(d) | |
df = result.get('urls') | |
if files and df: | |
for k, v in files.items(): | |
if k in df: | |
df[k] |= v | |
else: | |
df[k] = v | |
dd = result.get('digests') | |
if digests and dd: | |
dd.update(digests) | |
else: | |
# See issue #18. If any dists are found and we're looking | |
# for specific constraints, we only return something if | |
# a match is found. For example, if a DirectoryLocator | |
# returns just foo (1.0) while we're looking for | |
# foo (>= 2.0), we'll pretend there was nothing there so | |
# that subsequent locators can be queried. Otherwise we | |
# would just return foo (1.0) which would then lead to a | |
# failure to find foo (>= 2.0), because other locators | |
# weren't searched. Note that this only matters when | |
# merge=False. | |
if self.matcher is None: | |
found = True | |
else: | |
found = False | |
for k in d: | |
if self.matcher.match(k): | |
found = True | |
break | |
if found: | |
result = d | |
break | |
return result | |
def get_distribution_names(self): | |
""" | |
Return all the distribution names known to this locator. | |
""" | |
result = set() | |
for locator in self.locators: | |
try: | |
result |= locator.get_distribution_names() | |
except NotImplementedError: | |
pass | |
return result | |
# We use a legacy scheme simply because most of the dists on PyPI use legacy | |
# versions which don't conform to PEP 426 / PEP 440. | |
default_locator = AggregatingLocator( | |
JSONLocator(), | |
SimpleScrapingLocator('https://pypi.python.org/simple/', | |
timeout=3.0), | |
scheme='legacy') | |
locate = default_locator.locate | |
NAME_VERSION_RE = re.compile(r'(?P<name>[\w-]+)\s*' | |
r'\(\s*(==\s*)?(?P<ver>[^)]+)\)$') | |
class DependencyFinder(object): | |
""" | |
Locate dependencies for distributions. | |
""" | |
def __init__(self, locator=None): | |
""" | |
Initialise an instance, using the specified locator | |
to locate distributions. | |
""" | |
self.locator = locator or default_locator | |
self.scheme = get_scheme(self.locator.scheme) | |
def add_distribution(self, dist): | |
""" | |
Add a distribution to the finder. This will update internal information | |
about who provides what. | |
:param dist: The distribution to add. | |
""" | |
logger.debug('adding distribution %s', dist) | |
name = dist.key | |
self.dists_by_name[name] = dist | |
self.dists[(name, dist.version)] = dist | |
for p in dist.provides: | |
name, version = parse_name_and_version(p) | |
logger.debug('Add to provided: %s, %s, %s', name, version, dist) | |
self.provided.setdefault(name, set()).add((version, dist)) | |
def remove_distribution(self, dist): | |
""" | |
Remove a distribution from the finder. This will update internal | |
information about who provides what. | |
:param dist: The distribution to remove. | |
""" | |
logger.debug('removing distribution %s', dist) | |
name = dist.key | |
del self.dists_by_name[name] | |
del self.dists[(name, dist.version)] | |
for p in dist.provides: | |
name, version = parse_name_and_version(p) | |
logger.debug('Remove from provided: %s, %s, %s', name, version, dist) | |
s = self.provided[name] | |
s.remove((version, dist)) | |
if not s: | |
del self.provided[name] | |
def get_matcher(self, reqt): | |
""" | |
Get a version matcher for a requirement. | |
:param reqt: The requirement | |
:type reqt: str | |
:return: A version matcher (an instance of | |
:class:`distlib.version.Matcher`). | |
""" | |
try: | |
matcher = self.scheme.matcher(reqt) | |
except UnsupportedVersionError: # pragma: no cover | |
# XXX compat-mode if cannot read the version | |
name = reqt.split()[0] | |
matcher = self.scheme.matcher(name) | |
return matcher | |
def find_providers(self, reqt): | |
""" | |
Find the distributions which can fulfill a requirement. | |
:param reqt: The requirement. | |
:type reqt: str | |
:return: A set of distribution which can fulfill the requirement. | |
""" | |
matcher = self.get_matcher(reqt) | |
name = matcher.key # case-insensitive | |
result = set() | |
provided = self.provided | |
if name in provided: | |
for version, provider in provided[name]: | |
try: | |
match = matcher.match(version) | |
except UnsupportedVersionError: | |
match = False | |
if match: | |
result.add(provider) | |
break | |
return result | |
def try_to_replace(self, provider, other, problems): | |
""" | |
Attempt to replace one provider with another. This is typically used | |
when resolving dependencies from multiple sources, e.g. A requires | |
(B >= 1.0) while C requires (B >= 1.1). | |
For successful replacement, ``provider`` must meet all the requirements | |
which ``other`` fulfills. | |
:param provider: The provider we are trying to replace with. | |
:param other: The provider we're trying to replace. | |
:param problems: If False is returned, this will contain what | |
problems prevented replacement. This is currently | |
a tuple of the literal string 'cantreplace', | |
``provider``, ``other`` and the set of requirements | |
that ``provider`` couldn't fulfill. | |
:return: True if we can replace ``other`` with ``provider``, else | |
False. | |
""" | |
rlist = self.reqts[other] | |
unmatched = set() | |
for s in rlist: | |
matcher = self.get_matcher(s) | |
if not matcher.match(provider.version): | |
unmatched.add(s) | |
if unmatched: | |
# can't replace other with provider | |
problems.add(('cantreplace', provider, other, | |
frozenset(unmatched))) | |
result = False | |
else: | |
# can replace other with provider | |
self.remove_distribution(other) | |
del self.reqts[other] | |
for s in rlist: | |
self.reqts.setdefault(provider, set()).add(s) | |
self.add_distribution(provider) | |
result = True | |
return result | |
def find(self, requirement, meta_extras=None, prereleases=False): | |
""" | |
Find a distribution and all distributions it depends on. | |
:param requirement: The requirement specifying the distribution to | |
find, or a Distribution instance. | |
:param meta_extras: A list of meta extras such as :test:, :build: and | |
so on. | |
:param prereleases: If ``True``, allow pre-release versions to be | |
returned - otherwise, don't return prereleases | |
unless they're all that's available. | |
Return a set of :class:`Distribution` instances and a set of | |
problems. | |
The distributions returned should be such that they have the | |
:attr:`required` attribute set to ``True`` if they were | |
from the ``requirement`` passed to ``find()``, and they have the | |
:attr:`build_time_dependency` attribute set to ``True`` unless they | |
are post-installation dependencies of the ``requirement``. | |
The problems should be a tuple consisting of the string | |
``'unsatisfied'`` and the requirement which couldn't be satisfied | |
by any distribution known to the locator. | |
""" | |
self.provided = {} | |
self.dists = {} | |
self.dists_by_name = {} | |
self.reqts = {} | |
meta_extras = set(meta_extras or []) | |
if ':*:' in meta_extras: | |
meta_extras.remove(':*:') | |
# :meta: and :run: are implicitly included | |
meta_extras |= set([':test:', ':build:', ':dev:']) | |
if isinstance(requirement, Distribution): | |
dist = odist = requirement | |
logger.debug('passed %s as requirement', odist) | |
else: | |
dist = odist = self.locator.locate(requirement, | |
prereleases=prereleases) | |
if dist is None: | |
raise DistlibException('Unable to locate %r' % requirement) | |
logger.debug('located %s', odist) | |
dist.requested = True | |
problems = set() | |
todo = set([dist]) | |
install_dists = set([odist]) | |
while todo: | |
dist = todo.pop() | |
name = dist.key # case-insensitive | |
if name not in self.dists_by_name: | |
self.add_distribution(dist) | |
else: | |
#import pdb; pdb.set_trace() | |
other = self.dists_by_name[name] | |
if other != dist: | |
self.try_to_replace(dist, other, problems) | |
ireqts = dist.run_requires | dist.meta_requires | |
sreqts = dist.build_requires | |
ereqts = set() | |
if dist in install_dists: | |
for key in ('test', 'build', 'dev'): | |
e = ':%s:' % key | |
if e in meta_extras: | |
ereqts |= getattr(dist, '%s_requires' % key) | |
all_reqts = ireqts | sreqts | ereqts | |
for r in all_reqts: | |
providers = self.find_providers(r) | |
if not providers: | |
logger.debug('No providers found for %r', r) | |
provider = self.locator.locate(r, prereleases=prereleases) | |
# If no provider is found and we didn't consider | |
# prereleases, consider them now. | |
if provider is None and not prereleases: | |
provider = self.locator.locate(r, prereleases=True) | |
if provider is None: | |
logger.debug('Cannot satisfy %r', r) | |
problems.add(('unsatisfied', r)) | |
else: | |
n, v = provider.key, provider.version | |
if (n, v) not in self.dists: | |
todo.add(provider) | |
providers.add(provider) | |
if r in ireqts and dist in install_dists: | |
install_dists.add(provider) | |
logger.debug('Adding %s to install_dists', | |
provider.name_and_version) | |
for p in providers: | |
name = p.key | |
if name not in self.dists_by_name: | |
self.reqts.setdefault(p, set()).add(r) | |
else: | |
other = self.dists_by_name[name] | |
if other != p: | |
# see if other can be replaced by p | |
self.try_to_replace(p, other, problems) | |
dists = set(self.dists.values()) | |
for dist in dists: | |
dist.build_time_dependency = dist not in install_dists | |
if dist.build_time_dependency: | |
logger.debug('%s is a build-time dependency only.', | |
dist.name_and_version) | |
logger.debug('find done for %s', odist) | |
return dists, problems |
# -*- coding: utf-8 -*- | |
# | |
# Copyright (C) 2012-2013 Python Software Foundation. | |
# See LICENSE.txt and CONTRIBUTORS.txt. | |
# | |
""" | |
Class representing the list of files in a distribution. | |
Equivalent to distutils.filelist, but fixes some problems. | |
""" | |
import fnmatch | |
import logging | |
import os | |
import re | |
import sys | |
from . import DistlibException | |
from .compat import fsdecode | |
from .util import convert_path | |
__all__ = ['Manifest'] | |
logger = logging.getLogger(__name__) | |
# a \ followed by some spaces + EOL | |
_COLLAPSE_PATTERN = re.compile('\\\w*\n', re.M) | |
_COMMENTED_LINE = re.compile('#.*?(?=\n)|\n(?=$)', re.M | re.S) | |
# | |
# Due to the different results returned by fnmatch.translate, we need | |
# to do slightly different processing for Python 2.7 and 3.2 ... this needed | |
# to be brought in for Python 3.6 onwards. | |
# | |
_PYTHON_VERSION = sys.version_info[:2] | |
class Manifest(object): | |
"""A list of files built by on exploring the filesystem and filtered by | |
applying various patterns to what we find there. | |
""" | |
def __init__(self, base=None): | |
""" | |
Initialise an instance. | |
:param base: The base directory to explore under. | |
""" | |
self.base = os.path.abspath(os.path.normpath(base or os.getcwd())) | |
self.prefix = self.base + os.sep | |
self.allfiles = None | |
self.files = set() | |
# | |
# Public API | |
# | |
def findall(self): | |
"""Find all files under the base and set ``allfiles`` to the absolute | |
pathnames of files found. | |
""" | |
from stat import S_ISREG, S_ISDIR, S_ISLNK | |
self.allfiles = allfiles = [] | |
root = self.base | |
stack = [root] | |
pop = stack.pop | |
push = stack.append | |
while stack: | |
root = pop() | |
names = os.listdir(root) | |
for name in names: | |
fullname = os.path.join(root, name) | |
# Avoid excess stat calls -- just one will do, thank you! | |
stat = os.stat(fullname) | |
mode = stat.st_mode | |
if S_ISREG(mode): | |
allfiles.append(fsdecode(fullname)) | |
elif S_ISDIR(mode) and not S_ISLNK(mode): | |
push(fullname) | |
def add(self, item): | |
""" | |
Add a file to the manifest. | |
:param item: The pathname to add. This can be relative to the base. | |
""" | |
if not item.startswith(self.prefix): | |
item = os.path.join(self.base, item) | |
self.files.add(os.path.normpath(item)) | |
def add_many(self, items): | |
""" | |
Add a list of files to the manifest. | |
:param items: The pathnames to add. These can be relative to the base. | |
""" | |
for item in items: | |
self.add(item) | |
def sorted(self, wantdirs=False): | |
""" | |
Return sorted files in directory order | |
""" | |
def add_dir(dirs, d): | |
dirs.add(d) | |
logger.debug('add_dir added %s', d) | |
if d != self.base: | |
parent, _ = os.path.split(d) | |
assert parent not in ('', '/') | |
add_dir(dirs, parent) | |
result = set(self.files) # make a copy! | |
if wantdirs: | |
dirs = set() | |
for f in result: | |
add_dir(dirs, os.path.dirname(f)) | |
result |= dirs | |
return [os.path.join(*path_tuple) for path_tuple in | |
sorted(os.path.split(path) for path in result)] | |
def clear(self): | |
"""Clear all collected files.""" | |
self.files = set() | |
self.allfiles = [] | |
def process_directive(self, directive): | |
""" | |
Process a directive which either adds some files from ``allfiles`` to | |
``files``, or removes some files from ``files``. | |
:param directive: The directive to process. This should be in a format | |
compatible with distutils ``MANIFEST.in`` files: | |
http://docs.python.org/distutils/sourcedist.html#commands | |
""" | |
# Parse the line: split it up, make sure the right number of words | |
# is there, and return the relevant words. 'action' is always | |
# defined: it's the first word of the line. Which of the other | |
# three are defined depends on the action; it'll be either | |
# patterns, (dir and patterns), or (dirpattern). | |
action, patterns, thedir, dirpattern = self._parse_directive(directive) | |
# OK, now we know that the action is valid and we have the | |
# right number of words on the line for that action -- so we | |
# can proceed with minimal error-checking. | |
if action == 'include': | |
for pattern in patterns: | |
if not self._include_pattern(pattern, anchor=True): | |
logger.warning('no files found matching %r', pattern) | |
elif action == 'exclude': | |
for pattern in patterns: | |
found = self._exclude_pattern(pattern, anchor=True) | |
#if not found: | |
# logger.warning('no previously-included files ' | |
# 'found matching %r', pattern) | |
elif action == 'global-include': | |
for pattern in patterns: | |
if not self._include_pattern(pattern, anchor=False): | |
logger.warning('no files found matching %r ' | |
'anywhere in distribution', pattern) | |
elif action == 'global-exclude': | |
for pattern in patterns: | |
found = self._exclude_pattern(pattern, anchor=False) | |
#if not found: | |
# logger.warning('no previously-included files ' | |
# 'matching %r found anywhere in ' | |
# 'distribution', pattern) | |
elif action == 'recursive-include': | |
for pattern in patterns: | |
if not self._include_pattern(pattern, prefix=thedir): | |
logger.warning('no files found matching %r ' | |
'under directory %r', pattern, thedir) | |
elif action == 'recursive-exclude': | |
for pattern in patterns: | |
found = self._exclude_pattern(pattern, prefix=thedir) | |
#if not found: | |
# logger.warning('no previously-included files ' | |
# 'matching %r found under directory %r', | |
# pattern, thedir) | |
elif action == 'graft': | |
if not self._include_pattern(None, prefix=dirpattern): | |
logger.warning('no directories found matching %r', | |
dirpattern) | |
elif action == 'prune': | |
if not self._exclude_pattern(None, prefix=dirpattern): | |
logger.warning('no previously-included directories found ' | |
'matching %r', dirpattern) | |
else: # pragma: no cover | |
# This should never happen, as it should be caught in | |
# _parse_template_line | |
raise DistlibException( | |
'invalid action %r' % action) | |
# | |
# Private API | |
# | |
def _parse_directive(self, directive): | |
""" | |
Validate a directive. | |
:param directive: The directive to validate. | |
:return: A tuple of action, patterns, thedir, dir_patterns | |
""" | |
words = directive.split() | |
if len(words) == 1 and words[0] not in ('include', 'exclude', | |
'global-include', | |
'global-exclude', | |
'recursive-include', | |
'recursive-exclude', | |
'graft', 'prune'): | |
# no action given, let's use the default 'include' | |
words.insert(0, 'include') | |
action = words[0] | |
patterns = thedir = dir_pattern = None | |
if action in ('include', 'exclude', | |
'global-include', 'global-exclude'): | |
if len(words) < 2: | |
raise DistlibException( | |
'%r expects <pattern1> <pattern2> ...' % action) | |
patterns = [convert_path(word) for word in words[1:]] | |
elif action in ('recursive-include', 'recursive-exclude'): | |
if len(words) < 3: | |
raise DistlibException( | |
'%r expects <dir> <pattern1> <pattern2> ...' % action) | |
thedir = convert_path(words[1]) | |
patterns = [convert_path(word) for word in words[2:]] | |
elif action in ('graft', 'prune'): | |
if len(words) != 2: | |
raise DistlibException( | |
'%r expects a single <dir_pattern>' % action) | |
dir_pattern = convert_path(words[1]) | |
else: | |
raise DistlibException('unknown action %r' % action) | |
return action, patterns, thedir, dir_pattern | |
def _include_pattern(self, pattern, anchor=True, prefix=None, | |
is_regex=False): | |
"""Select strings (presumably filenames) from 'self.files' that | |
match 'pattern', a Unix-style wildcard (glob) pattern. | |
Patterns are not quite the same as implemented by the 'fnmatch' | |
module: '*' and '?' match non-special characters, where "special" | |
is platform-dependent: slash on Unix; colon, slash, and backslash on | |
DOS/Windows; and colon on Mac OS. | |
If 'anchor' is true (the default), then the pattern match is more | |
stringent: "*.py" will match "foo.py" but not "foo/bar.py". If | |
'anchor' is false, both of these will match. | |
If 'prefix' is supplied, then only filenames starting with 'prefix' | |
(itself a pattern) and ending with 'pattern', with anything in between | |
them, will match. 'anchor' is ignored in this case. | |
If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and | |
'pattern' is assumed to be either a string containing a regex or a | |
regex object -- no translation is done, the regex is just compiled | |
and used as-is. | |
Selected strings will be added to self.files. | |
Return True if files are found. | |
""" | |
# XXX docstring lying about what the special chars are? | |
found = False | |
pattern_re = self._translate_pattern(pattern, anchor, prefix, is_regex) | |
# delayed loading of allfiles list | |
if self.allfiles is None: | |
self.findall() | |
for name in self.allfiles: | |
if pattern_re.search(name): | |
self.files.add(name) | |
found = True | |
return found | |
def _exclude_pattern(self, pattern, anchor=True, prefix=None, | |
is_regex=False): | |
"""Remove strings (presumably filenames) from 'files' that match | |
'pattern'. | |
Other parameters are the same as for 'include_pattern()', above. | |
The list 'self.files' is modified in place. Return True if files are | |
found. | |
This API is public to allow e.g. exclusion of SCM subdirs, e.g. when | |
packaging source distributions | |
""" | |
found = False | |
pattern_re = self._translate_pattern(pattern, anchor, prefix, is_regex) | |
for f in list(self.files): | |
if pattern_re.search(f): | |
self.files.remove(f) | |
found = True | |
return found | |
def _translate_pattern(self, pattern, anchor=True, prefix=None, | |
is_regex=False): | |
"""Translate a shell-like wildcard pattern to a compiled regular | |
expression. | |
Return the compiled regex. If 'is_regex' true, | |
then 'pattern' is directly compiled to a regex (if it's a string) | |
or just returned as-is (assumes it's a regex object). | |
""" | |
if is_regex: | |
if isinstance(pattern, str): | |
return re.compile(pattern) | |
else: | |
return pattern | |
if _PYTHON_VERSION > (3, 2): | |
# ditch start and end characters | |
start, _, end = self._glob_to_re('_').partition('_') | |
if pattern: | |
pattern_re = self._glob_to_re(pattern) | |
if _PYTHON_VERSION > (3, 2): | |
assert pattern_re.startswith(start) and pattern_re.endswith(end) | |
else: | |
pattern_re = '' | |
base = re.escape(os.path.join(self.base, '')) | |
if prefix is not None: | |
# ditch end of pattern character | |
if _PYTHON_VERSION <= (3, 2): | |
empty_pattern = self._glob_to_re('') | |
prefix_re = self._glob_to_re(prefix)[:-len(empty_pattern)] | |
else: | |
prefix_re = self._glob_to_re(prefix) | |
assert prefix_re.startswith(start) and prefix_re.endswith(end) | |
prefix_re = prefix_re[len(start): len(prefix_re) - len(end)] | |
sep = os.sep | |
if os.sep == '\\': | |
sep = r'\\' | |
if _PYTHON_VERSION <= (3, 2): | |
pattern_re = '^' + base + sep.join((prefix_re, | |
'.*' + pattern_re)) | |
else: | |
pattern_re = pattern_re[len(start): len(pattern_re) - len(end)] | |
pattern_re = r'%s%s%s%s.*%s%s' % (start, base, prefix_re, sep, | |
pattern_re, end) | |
else: # no prefix -- respect anchor flag | |
if anchor: | |
if _PYTHON_VERSION <= (3, 2): | |
pattern_re = '^' + base + pattern_re | |
else: | |
pattern_re = r'%s%s%s' % (start, base, pattern_re[len(start):]) | |
return re.compile(pattern_re) | |
def _glob_to_re(self, pattern): | |
"""Translate a shell-like glob pattern to a regular expression. | |
Return a string containing the regex. Differs from | |
'fnmatch.translate()' in that '*' does not match "special characters" | |
(which are platform-specific). | |
""" | |
pattern_re = fnmatch.translate(pattern) | |
# '?' and '*' in the glob pattern become '.' and '.*' in the RE, which | |
# IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix, | |
# and by extension they shouldn't match such "special characters" under | |
# any OS. So change all non-escaped dots in the RE to match any | |
# character except the special characters (currently: just os.sep). | |
sep = os.sep | |
if os.sep == '\\': | |
# we're using a regex to manipulate a regex, so we need | |
# to escape the backslash twice | |
sep = r'\\\\' | |
escaped = r'\1[^%s]' % sep | |
pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re) | |
return pattern_re |
# -*- coding: utf-8 -*- | |
# | |
# Copyright (C) 2012-2013 Vinay Sajip. | |
# Licensed to the Python Software Foundation under a contributor agreement. | |
# See LICENSE.txt and CONTRIBUTORS.txt. | |
# | |
"""Parser for the environment markers micro-language defined in PEP 345.""" | |
import ast | |
import os | |
import sys | |
import platform | |
from .compat import python_implementation, string_types | |
from .util import in_venv | |
__all__ = ['interpret'] | |
class Evaluator(object): | |
""" | |
A limited evaluator for Python expressions. | |
""" | |
operators = { | |
'eq': lambda x, y: x == y, | |
'gt': lambda x, y: x > y, | |
'gte': lambda x, y: x >= y, | |
'in': lambda x, y: x in y, | |
'lt': lambda x, y: x < y, | |
'lte': lambda x, y: x <= y, | |
'not': lambda x: not x, | |
'noteq': lambda x, y: x != y, | |
'notin': lambda x, y: x not in y, | |
} | |
allowed_values = { | |
'sys_platform': sys.platform, | |
'python_version': '%s.%s' % sys.version_info[:2], | |
# parsing sys.platform is not reliable, but there is no other | |
# way to get e.g. 2.7.2+, and the PEP is defined with sys.version | |
'python_full_version': sys.version.split(' ', 1)[0], | |
'os_name': os.name, | |
'platform_in_venv': str(in_venv()), | |
'platform_release': platform.release(), | |
'platform_version': platform.version(), | |
'platform_machine': platform.machine(), | |
'platform_python_implementation': python_implementation(), | |
} | |
def __init__(self, context=None): | |
""" | |
Initialise an instance. | |
:param context: If specified, names are looked up in this mapping. | |
""" | |
self.context = context or {} | |
self.source = None | |
def get_fragment(self, offset): | |
""" | |
Get the part of the source which is causing a problem. | |
""" | |
fragment_len = 10 | |
s = '%r' % (self.source[offset:offset + fragment_len]) | |
if offset + fragment_len < len(self.source): | |
s += '...' | |
return s | |
def get_handler(self, node_type): | |
""" | |
Get a handler for the specified AST node type. | |
""" | |
return getattr(self, 'do_%s' % node_type, None) | |
def evaluate(self, node, filename=None): | |
""" | |
Evaluate a source string or node, using ``filename`` when | |
displaying errors. | |
""" | |
if isinstance(node, string_types): | |
self.source = node | |
kwargs = {'mode': 'eval'} | |
if filename: | |
kwargs['filename'] = filename | |
try: | |
node = ast.parse(node, **kwargs) | |
except SyntaxError as e: | |
s = self.get_fragment(e.offset) | |
raise SyntaxError('syntax error %s' % s) | |
node_type = node.__class__.__name__.lower() | |
handler = self.get_handler(node_type) | |
if handler is None: | |
if self.source is None: | |
s = '(source not available)' | |
else: | |
s = self.get_fragment(node.col_offset) | |
raise SyntaxError("don't know how to evaluate %r %s" % ( | |
node_type, s)) | |
return handler(node) | |
def get_attr_key(self, node): | |
assert isinstance(node, ast.Attribute), 'attribute node expected' | |
return '%s.%s' % (node.value.id, node.attr) | |
def do_attribute(self, node): | |
if not isinstance(node.value, ast.Name): | |
valid = False | |
else: | |
key = self.get_attr_key(node) | |
valid = key in self.context or key in self.allowed_values | |
if not valid: | |
raise SyntaxError('invalid expression: %s' % key) | |
if key in self.context: | |
result = self.context[key] | |
else: | |
result = self.allowed_values[key] | |
return result | |
def do_boolop(self, node): | |
result = self.evaluate(node.values[0]) | |
is_or = node.op.__class__ is ast.Or | |
is_and = node.op.__class__ is ast.And | |
assert is_or or is_and | |
if (is_and and result) or (is_or and not result): | |
for n in node.values[1:]: | |
result = self.evaluate(n) | |
if (is_or and result) or (is_and and not result): | |
break | |
return result | |
def do_compare(self, node): | |
def sanity_check(lhsnode, rhsnode): | |
valid = True | |
if isinstance(lhsnode, ast.Str) and isinstance(rhsnode, ast.Str): | |
valid = False | |
#elif (isinstance(lhsnode, ast.Attribute) | |
# and isinstance(rhsnode, ast.Attribute)): | |
# klhs = self.get_attr_key(lhsnode) | |
# krhs = self.get_attr_key(rhsnode) | |
# valid = klhs != krhs | |
if not valid: | |
s = self.get_fragment(node.col_offset) | |
raise SyntaxError('Invalid comparison: %s' % s) | |
lhsnode = node.left | |
lhs = self.evaluate(lhsnode) | |
result = True | |
for op, rhsnode in zip(node.ops, node.comparators): | |
sanity_check(lhsnode, rhsnode) | |
op = op.__class__.__name__.lower() | |
if op not in self.operators: | |
raise SyntaxError('unsupported operation: %r' % op) | |
rhs = self.evaluate(rhsnode) | |
result = self.operators[op](lhs, rhs) | |
if not result: | |
break | |
lhs = rhs | |
lhsnode = rhsnode | |
return result | |
def do_expression(self, node): | |
return self.evaluate(node.body) | |
def do_name(self, node): | |
valid = False | |
if node.id in self.context: | |
valid = True | |
result = self.context[node.id] | |
elif node.id in self.allowed_values: | |
valid = True | |
result = self.allowed_values[node.id] | |
if not valid: | |
raise SyntaxError('invalid expression: %s' % node.id) | |
return result | |
def do_str(self, node): | |
return node.s | |
def interpret(marker, execution_context=None): | |
""" | |
Interpret a marker and return a result depending on environment. | |
:param marker: The marker to interpret. | |
:type marker: str | |
:param execution_context: The context used for name lookup. | |
:type execution_context: mapping | |
""" | |
return Evaluator(execution_context).evaluate(marker.strip()) |
# -*- coding: utf-8 -*- | |
# | |
# Copyright (C) 2012 The Python Software Foundation. | |
# See LICENSE.txt and CONTRIBUTORS.txt. | |
# | |
"""Implementation of the Metadata for Python packages PEPs. | |
Supports all metadata formats (1.0, 1.1, 1.2, and 2.0 experimental). | |
""" | |
from __future__ import unicode_literals | |
import codecs | |
from email import message_from_file | |
import json | |
import logging | |
import re | |
from . import DistlibException, __version__ | |
from .compat import StringIO, string_types, text_type | |
from .markers import interpret | |
from .util import extract_by_key, get_extras | |
from .version import get_scheme, PEP440_VERSION_RE | |
logger = logging.getLogger(__name__) | |
class MetadataMissingError(DistlibException): | |
"""A required metadata is missing""" | |
class MetadataConflictError(DistlibException): | |
"""Attempt to read or write metadata fields that are conflictual.""" | |
class MetadataUnrecognizedVersionError(DistlibException): | |
"""Unknown metadata version number.""" | |
class MetadataInvalidError(DistlibException): | |
"""A metadata value is invalid""" | |
# public API of this module | |
__all__ = ['Metadata', 'PKG_INFO_ENCODING', 'PKG_INFO_PREFERRED_VERSION'] | |
# Encoding used for the PKG-INFO files | |
PKG_INFO_ENCODING = 'utf-8' | |
# preferred version. Hopefully will be changed | |
# to 1.2 once PEP 345 is supported everywhere | |
PKG_INFO_PREFERRED_VERSION = '1.1' | |
_LINE_PREFIX_1_2 = re.compile('\n \|') | |
_LINE_PREFIX_PRE_1_2 = re.compile('\n ') | |
_241_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform', | |
'Summary', 'Description', | |
'Keywords', 'Home-page', 'Author', 'Author-email', | |
'License') | |
_314_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform', | |
'Supported-Platform', 'Summary', 'Description', | |
'Keywords', 'Home-page', 'Author', 'Author-email', | |
'License', 'Classifier', 'Download-URL', 'Obsoletes', | |
'Provides', 'Requires') | |
_314_MARKERS = ('Obsoletes', 'Provides', 'Requires', 'Classifier', | |
'Download-URL') | |
_345_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform', | |
'Supported-Platform', 'Summary', 'Description', | |
'Keywords', 'Home-page', 'Author', 'Author-email', | |
'Maintainer', 'Maintainer-email', 'License', | |
'Classifier', 'Download-URL', 'Obsoletes-Dist', | |
'Project-URL', 'Provides-Dist', 'Requires-Dist', | |
'Requires-Python', 'Requires-External') | |
_345_MARKERS = ('Provides-Dist', 'Requires-Dist', 'Requires-Python', | |
'Obsoletes-Dist', 'Requires-External', 'Maintainer', | |
'Maintainer-email', 'Project-URL') | |
_426_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform', | |
'Supported-Platform', 'Summary', 'Description', | |
'Keywords', 'Home-page', 'Author', 'Author-email', | |
'Maintainer', 'Maintainer-email', 'License', | |
'Classifier', 'Download-URL', 'Obsoletes-Dist', | |
'Project-URL', 'Provides-Dist', 'Requires-Dist', | |
'Requires-Python', 'Requires-External', 'Private-Version', | |
'Obsoleted-By', 'Setup-Requires-Dist', 'Extension', | |
'Provides-Extra') | |
_426_MARKERS = ('Private-Version', 'Provides-Extra', 'Obsoleted-By', | |
'Setup-Requires-Dist', 'Extension') | |
_ALL_FIELDS = set() | |
_ALL_FIELDS.update(_241_FIELDS) | |
_ALL_FIELDS.update(_314_FIELDS) | |
_ALL_FIELDS.update(_345_FIELDS) | |
_ALL_FIELDS.update(_426_FIELDS) | |
EXTRA_RE = re.compile(r'''extra\s*==\s*("([^"]+)"|'([^']+)')''') | |
def _version2fieldlist(version): | |
if version == '1.0': | |
return _241_FIELDS | |
elif version == '1.1': | |
return _314_FIELDS | |
elif version == '1.2': | |
return _345_FIELDS | |
elif version == '2.0': | |
return _426_FIELDS | |
raise MetadataUnrecognizedVersionError(version) | |
def _best_version(fields): | |
"""Detect the best version depending on the fields used.""" | |
def _has_marker(keys, markers): | |
for marker in markers: | |
if marker in keys: | |
return True | |
return False | |
keys = [] | |
for key, value in fields.items(): | |
if value in ([], 'UNKNOWN', None): | |
continue | |
keys.append(key) | |
possible_versions = ['1.0', '1.1', '1.2', '2.0'] | |
# first let's try to see if a field is not part of one of the version | |
for key in keys: | |
if key not in _241_FIELDS and '1.0' in possible_versions: | |
possible_versions.remove('1.0') | |
if key not in _314_FIELDS and '1.1' in possible_versions: | |
possible_versions.remove('1.1') | |
if key not in _345_FIELDS and '1.2' in possible_versions: | |
possible_versions.remove('1.2') | |
if key not in _426_FIELDS and '2.0' in possible_versions: | |
possible_versions.remove('2.0') | |
# possible_version contains qualified versions | |
if len(possible_versions) == 1: | |
return possible_versions[0] # found ! | |
elif len(possible_versions) == 0: | |
raise MetadataConflictError('Unknown metadata set') | |
# let's see if one unique marker is found | |
is_1_1 = '1.1' in possible_versions and _has_marker(keys, _314_MARKERS) | |
is_1_2 = '1.2' in possible_versions and _has_marker(keys, _345_MARKERS) | |
is_2_0 = '2.0' in possible_versions and _has_marker(keys, _426_MARKERS) | |
if int(is_1_1) + int(is_1_2) + int(is_2_0) > 1: | |
raise MetadataConflictError('You used incompatible 1.1/1.2/2.0 fields') | |
# we have the choice, 1.0, or 1.2, or 2.0 | |
# - 1.0 has a broken Summary field but works with all tools | |
# - 1.1 is to avoid | |
# - 1.2 fixes Summary but has little adoption | |
# - 2.0 adds more features and is very new | |
if not is_1_1 and not is_1_2 and not is_2_0: | |
# we couldn't find any specific marker | |
if PKG_INFO_PREFERRED_VERSION in possible_versions: | |
return PKG_INFO_PREFERRED_VERSION | |
if is_1_1: | |
return '1.1' | |
if is_1_2: | |
return '1.2' | |
return '2.0' | |
_ATTR2FIELD = { | |
'metadata_version': 'Metadata-Version', | |
'name': 'Name', | |
'version': 'Version', | |
'platform': 'Platform', | |
'supported_platform': 'Supported-Platform', | |
'summary': 'Summary', | |
'description': 'Description', | |
'keywords': 'Keywords', | |
'home_page': 'Home-page', | |
'author': 'Author', | |
'author_email': 'Author-email', | |
'maintainer': 'Maintainer', | |
'maintainer_email': 'Maintainer-email', | |
'license': 'License', | |
'classifier': 'Classifier', | |
'download_url': 'Download-URL', | |
'obsoletes_dist': 'Obsoletes-Dist', | |
'provides_dist': 'Provides-Dist', | |
'requires_dist': 'Requires-Dist', | |
'setup_requires_dist': 'Setup-Requires-Dist', | |
'requires_python': 'Requires-Python', | |
'requires_external': 'Requires-External', | |
'requires': 'Requires', | |
'provides': 'Provides', | |
'obsoletes': 'Obsoletes', | |
'project_url': 'Project-URL', | |
'private_version': 'Private-Version', | |
'obsoleted_by': 'Obsoleted-By', | |
'extension': 'Extension', | |
'provides_extra': 'Provides-Extra', | |
} | |
_PREDICATE_FIELDS = ('Requires-Dist', 'Obsoletes-Dist', 'Provides-Dist') | |
_VERSIONS_FIELDS = ('Requires-Python',) | |
_VERSION_FIELDS = ('Version',) | |
_LISTFIELDS = ('Platform', 'Classifier', 'Obsoletes', | |
'Requires', 'Provides', 'Obsoletes-Dist', | |
'Provides-Dist', 'Requires-Dist', 'Requires-External', | |
'Project-URL', 'Supported-Platform', 'Setup-Requires-Dist', | |
'Provides-Extra', 'Extension') | |
_LISTTUPLEFIELDS = ('Project-URL',) | |
_ELEMENTSFIELD = ('Keywords',) | |
_UNICODEFIELDS = ('Author', 'Maintainer', 'Summary', 'Description') | |
_MISSING = object() | |
_FILESAFE = re.compile('[^A-Za-z0-9.]+') | |
def _get_name_and_version(name, version, for_filename=False): | |
"""Return the distribution name with version. | |
If for_filename is true, return a filename-escaped form.""" | |
if for_filename: | |
# For both name and version any runs of non-alphanumeric or '.' | |
# characters are replaced with a single '-'. Additionally any | |
# spaces in the version string become '.' | |
name = _FILESAFE.sub('-', name) | |
version = _FILESAFE.sub('-', version.replace(' ', '.')) | |
return '%s-%s' % (name, version) | |
class LegacyMetadata(object): | |
"""The legacy metadata of a release. | |
Supports versions 1.0, 1.1 and 1.2 (auto-detected). You can | |
instantiate the class with one of these arguments (or none): | |
- *path*, the path to a metadata file | |
- *fileobj* give a file-like object with metadata as content | |
- *mapping* is a dict-like object | |
- *scheme* is a version scheme name | |
""" | |
# TODO document the mapping API and UNKNOWN default key | |
def __init__(self, path=None, fileobj=None, mapping=None, | |
scheme='default'): | |
if [path, fileobj, mapping].count(None) < 2: | |
raise TypeError('path, fileobj and mapping are exclusive') | |
self._fields = {} | |
self.requires_files = [] | |
self._dependencies = None | |
self.scheme = scheme | |
if path is not None: | |
self.read(path) | |
elif fileobj is not None: | |
self.read_file(fileobj) | |
elif mapping is not None: | |
self.update(mapping) | |
self.set_metadata_version() | |
def set_metadata_version(self): | |
self._fields['Metadata-Version'] = _best_version(self._fields) | |
def _write_field(self, fileobj, name, value): | |
fileobj.write('%s: %s\n' % (name, value)) | |
def __getitem__(self, name): | |
return self.get(name) | |
def __setitem__(self, name, value): | |
return self.set(name, value) | |
def __delitem__(self, name): | |
field_name = self._convert_name(name) | |
try: | |
del self._fields[field_name] | |
except KeyError: | |
raise KeyError(name) | |
def __contains__(self, name): | |
return (name in self._fields or | |
self._convert_name(name) in self._fields) | |
def _convert_name(self, name): | |
if name in _ALL_FIELDS: | |
return name | |
name = name.replace('-', '_').lower() | |
return _ATTR2FIELD.get(name, name) | |
def _default_value(self, name): | |
if name in _LISTFIELDS or name in _ELEMENTSFIELD: | |
return [] | |
return 'UNKNOWN' | |
def _remove_line_prefix(self, value): | |
if self.metadata_version in ('1.0', '1.1'): | |
return _LINE_PREFIX_PRE_1_2.sub('\n', value) | |
else: | |
return _LINE_PREFIX_1_2.sub('\n', value) | |
def __getattr__(self, name): | |
if name in _ATTR2FIELD: | |
return self[name] | |
raise AttributeError(name) | |
# | |
# Public API | |
# | |
# dependencies = property(_get_dependencies, _set_dependencies) | |
def get_fullname(self, filesafe=False): | |
"""Return the distribution name with version. | |
If filesafe is true, return a filename-escaped form.""" | |
return _get_name_and_version(self['Name'], self['Version'], filesafe) | |
def is_field(self, name): | |
"""return True if name is a valid metadata key""" | |
name = self._convert_name(name) | |
return name in _ALL_FIELDS | |
def is_multi_field(self, name): | |
name = self._convert_name(name) | |
return name in _LISTFIELDS | |
def read(self, filepath): | |
"""Read the metadata values from a file path.""" | |
fp = codecs.open(filepath, 'r', encoding='utf-8') | |
try: | |
self.read_file(fp) | |
finally: | |
fp.close() | |
def read_file(self, fileob): | |
"""Read the metadata values from a file object.""" | |
msg = message_from_file(fileob) | |
self._fields['Metadata-Version'] = msg['metadata-version'] | |
# When reading, get all the fields we can | |
for field in _ALL_FIELDS: | |
if field not in msg: | |
continue | |
if field in _LISTFIELDS: | |
# we can have multiple lines | |
values = msg.get_all(field) | |
if field in _LISTTUPLEFIELDS and values is not None: | |
values = [tuple(value.split(',')) for value in values] | |
self.set(field, values) | |
else: | |
# single line | |
value = msg[field] | |
if value is not None and value != 'UNKNOWN': | |
self.set(field, value) | |
self.set_metadata_version() | |
def write(self, filepath, skip_unknown=False): | |
"""Write the metadata fields to filepath.""" | |
fp = codecs.open(filepath, 'w', encoding='utf-8') | |
try: | |
self.write_file(fp, skip_unknown) | |
finally: | |
fp.close() | |
def write_file(self, fileobject, skip_unknown=False): | |
"""Write the PKG-INFO format data to a file object.""" | |
self.set_metadata_version() | |
for field in _version2fieldlist(self['Metadata-Version']): | |
values = self.get(field) | |
if skip_unknown and values in ('UNKNOWN', [], ['UNKNOWN']): | |
continue | |
if field in _ELEMENTSFIELD: | |
self._write_field(fileobject, field, ','.join(values)) | |
continue | |
if field not in _LISTFIELDS: | |
if field == 'Description': | |
if self.metadata_version in ('1.0', '1.1'): | |
values = values.replace('\n', '\n ') | |
else: | |
values = values.replace('\n', '\n |') | |
values = [values] | |
if field in _LISTTUPLEFIELDS: | |
values = [','.join(value) for value in values] | |
for value in values: | |
self._write_field(fileobject, field, value) | |
def update(self, other=None, **kwargs): | |
"""Set metadata values from the given iterable `other` and kwargs. | |
Behavior is like `dict.update`: If `other` has a ``keys`` method, | |
they are looped over and ``self[key]`` is assigned ``other[key]``. | |
Else, ``other`` is an iterable of ``(key, value)`` iterables. | |
Keys that don't match a metadata field or that have an empty value are | |
dropped. | |
""" | |
def _set(key, value): | |
if key in _ATTR2FIELD and value: | |
self.set(self._convert_name(key), value) | |
if not other: | |
# other is None or empty container | |
pass | |
elif hasattr(other, 'keys'): | |
for k in other.keys(): | |
_set(k, other[k]) | |
else: | |
for k, v in other: | |
_set(k, v) | |
if kwargs: | |
for k, v in kwargs.items(): | |
_set(k, v) | |
def set(self, name, value): | |
"""Control then set a metadata field.""" | |
name = self._convert_name(name) | |
if ((name in _ELEMENTSFIELD or name == 'Platform') and | |
not isinstance(value, (list, tuple))): | |
if isinstance(value, string_types): | |
value = [v.strip() for v in value.split(',')] | |
else: | |
value = [] | |
elif (name in _LISTFIELDS and | |
not isinstance(value, (list, tuple))): | |
if isinstance(value, string_types): | |
value = [value] | |
else: | |
value = [] | |
if logger.isEnabledFor(logging.WARNING): | |
project_name = self['Name'] | |
scheme = get_scheme(self.scheme) | |
if name in _PREDICATE_FIELDS and value is not None: | |
for v in value: | |
# check that the values are valid | |
if not scheme.is_valid_matcher(v.split(';')[0]): | |
logger.warning( | |
"'%s': '%s' is not valid (field '%s')", | |
project_name, v, name) | |
# FIXME this rejects UNKNOWN, is that right? | |
elif name in _VERSIONS_FIELDS and value is not None: | |
if not scheme.is_valid_constraint_list(value): | |
logger.warning("'%s': '%s' is not a valid version (field '%s')", | |
project_name, value, name) | |
elif name in _VERSION_FIELDS and value is not None: | |
if not scheme.is_valid_version(value): | |
logger.warning("'%s': '%s' is not a valid version (field '%s')", | |
project_name, value, name) | |
if name in _UNICODEFIELDS: | |
if name == 'Description': | |
value = self._remove_line_prefix(value) | |
self._fields[name] = value | |
def get(self, name, default=_MISSING): | |
"""Get a metadata field.""" | |
name = self._convert_name(name) | |
if name not in self._fields: | |
if default is _MISSING: | |
default = self._default_value(name) | |
return default | |
if name in _UNICODEFIELDS: | |
value = self._fields[name] | |
return value | |
elif name in _LISTFIELDS: | |
value = self._fields[name] | |
if value is None: | |
return [] | |
res = [] | |
for val in value: | |
if name not in _LISTTUPLEFIELDS: | |
res.append(val) | |
else: | |
# That's for Project-URL | |
res.append((val[0], val[1])) | |
return res | |
elif name in _ELEMENTSFIELD: | |
value = self._fields[name] | |
if isinstance(value, string_types): | |
return value.split(',') | |
return self._fields[name] | |
def check(self, strict=False): | |
"""Check if the metadata is compliant. If strict is True then raise if | |
no Name or Version are provided""" | |
self.set_metadata_version() | |
# XXX should check the versions (if the file was loaded) | |
missing, warnings = [], [] | |
for attr in ('Name', 'Version'): # required by PEP 345 | |
if attr not in self: | |
missing.append(attr) | |
if strict and missing != []: | |
msg = 'missing required metadata: %s' % ', '.join(missing) | |
raise MetadataMissingError(msg) | |
for attr in ('Home-page', 'Author'): | |
if attr not in self: | |
missing.append(attr) | |
# checking metadata 1.2 (XXX needs to check 1.1, 1.0) | |
if self['Metadata-Version'] != '1.2': | |
return missing, warnings | |
scheme = get_scheme(self.scheme) | |
def are_valid_constraints(value): | |
for v in value: | |
if not scheme.is_valid_matcher(v.split(';')[0]): | |
return False | |
return True | |
for fields, controller in ((_PREDICATE_FIELDS, are_valid_constraints), | |
(_VERSIONS_FIELDS, | |
scheme.is_valid_constraint_list), | |
(_VERSION_FIELDS, | |
scheme.is_valid_version)): | |
for field in fields: | |
value = self.get(field, None) | |
if value is not None and not controller(value): | |
warnings.append("Wrong value for '%s': %s" % (field, value)) | |
return missing, warnings | |
def todict(self, skip_missing=False): | |
"""Return fields as a dict. | |
Field names will be converted to use the underscore-lowercase style | |
instead of hyphen-mixed case (i.e. home_page instead of Home-page). | |
""" | |
self.set_metadata_version() | |
mapping_1_0 = ( | |
('metadata_version', 'Metadata-Version'), | |
('name', 'Name'), | |
('version', 'Version'), | |
('summary', 'Summary'), | |
('home_page', 'Home-page'), | |
('author', 'Author'), | |
('author_email', 'Author-email'), | |
('license', 'License'), | |
('description', 'Description'), | |
('keywords', 'Keywords'), | |
('platform', 'Platform'), | |
('classifiers', 'Classifier'), | |
('download_url', 'Download-URL'), | |
) | |
data = {} | |
for key, field_name in mapping_1_0: | |
if not skip_missing or field_name in self._fields: | |
data[key] = self[field_name] | |
if self['Metadata-Version'] == '1.2': | |
mapping_1_2 = ( | |
('requires_dist', 'Requires-Dist'), | |
('requires_python', 'Requires-Python'), | |
('requires_external', 'Requires-External'), | |
('provides_dist', 'Provides-Dist'), | |
('obsoletes_dist', 'Obsoletes-Dist'), | |
('project_url', 'Project-URL'), | |
('maintainer', 'Maintainer'), | |
('maintainer_email', 'Maintainer-email'), | |
) | |
for key, field_name in mapping_1_2: | |
if not skip_missing or field_name in self._fields: | |
if key != 'project_url': | |
data[key] = self[field_name] | |
else: | |
data[key] = [','.join(u) for u in self[field_name]] | |
elif self['Metadata-Version'] == '1.1': | |
mapping_1_1 = ( | |
('provides', 'Provides'), | |
('requires', 'Requires'), | |
('obsoletes', 'Obsoletes'), | |
) | |
for key, field_name in mapping_1_1: | |
if not skip_missing or field_name in self._fields: | |
data[key] = self[field_name] | |
return data | |
def add_requirements(self, requirements): | |
if self['Metadata-Version'] == '1.1': | |
# we can't have 1.1 metadata *and* Setuptools requires | |
for field in ('Obsoletes', 'Requires', 'Provides'): | |
if field in self: | |
del self[field] | |
self['Requires-Dist'] += requirements | |
# Mapping API | |
# TODO could add iter* variants | |
def keys(self): | |
return list(_version2fieldlist(self['Metadata-Version'])) | |
def __iter__(self): | |
for key in self.keys(): | |
yield key | |
def values(self): | |
return [self[key] for key in self.keys()] | |
def items(self): | |
return [(key, self[key]) for key in self.keys()] | |
def __repr__(self): | |
return '<%s %s %s>' % (self.__class__.__name__, self.name, | |
self.version) | |
METADATA_FILENAME = 'pydist.json' | |
WHEEL_METADATA_FILENAME = 'metadata.json' | |
class Metadata(object): | |
""" | |
The metadata of a release. This implementation uses 2.0 (JSON) | |
metadata where possible. If not possible, it wraps a LegacyMetadata | |
instance which handles the key-value metadata format. | |
""" | |
METADATA_VERSION_MATCHER = re.compile('^\d+(\.\d+)*$') | |
NAME_MATCHER = re.compile('^[0-9A-Z]([0-9A-Z_.-]*[0-9A-Z])?$', re.I) | |
VERSION_MATCHER = PEP440_VERSION_RE | |
SUMMARY_MATCHER = re.compile('.{1,2047}') | |
METADATA_VERSION = '2.0' | |
GENERATOR = 'distlib (%s)' % __version__ | |
MANDATORY_KEYS = { | |
'name': (), | |
'version': (), | |
'summary': ('legacy',), | |
} | |
INDEX_KEYS = ('name version license summary description author ' | |
'author_email keywords platform home_page classifiers ' | |
'download_url') | |
DEPENDENCY_KEYS = ('extras run_requires test_requires build_requires ' | |
'dev_requires provides meta_requires obsoleted_by ' | |
'supports_environments') | |
SYNTAX_VALIDATORS = { | |
'metadata_version': (METADATA_VERSION_MATCHER, ()), | |
'name': (NAME_MATCHER, ('legacy',)), | |
'version': (VERSION_MATCHER, ('legacy',)), | |
'summary': (SUMMARY_MATCHER, ('legacy',)), | |
} | |
__slots__ = ('_legacy', '_data', 'scheme') | |
def __init__(self, path=None, fileobj=None, mapping=None, | |
scheme='default'): | |
if [path, fileobj, mapping].count(None) < 2: | |
raise TypeError('path, fileobj and mapping are exclusive') | |
self._legacy = None | |
self._data = None | |
self.scheme = scheme | |
#import pdb; pdb.set_trace() | |
if mapping is not None: | |
try: | |
self._validate_mapping(mapping, scheme) | |
self._data = mapping | |
except MetadataUnrecognizedVersionError: | |
self._legacy = LegacyMetadata(mapping=mapping, scheme=scheme) | |
self.validate() | |
else: | |
data = None | |
if path: | |
with open(path, 'rb') as f: | |
data = f.read() | |
elif fileobj: | |
data = fileobj.read() | |
if data is None: | |
# Initialised with no args - to be added | |
self._data = { | |
'metadata_version': self.METADATA_VERSION, | |
'generator': self.GENERATOR, | |
} | |
else: | |
if not isinstance(data, text_type): | |
data = data.decode('utf-8') | |
try: | |
self._data = json.loads(data) | |
self._validate_mapping(self._data, scheme) | |
except ValueError: | |
# Note: MetadataUnrecognizedVersionError does not | |
# inherit from ValueError (it's a DistlibException, | |
# which should not inherit from ValueError). | |
# The ValueError comes from the json.load - if that | |
# succeeds and we get a validation error, we want | |
# that to propagate | |
self._legacy = LegacyMetadata(fileobj=StringIO(data), | |
scheme=scheme) | |
self.validate() | |
common_keys = set(('name', 'version', 'license', 'keywords', 'summary')) | |
none_list = (None, list) | |
none_dict = (None, dict) | |
mapped_keys = { | |
'run_requires': ('Requires-Dist', list), | |
'build_requires': ('Setup-Requires-Dist', list), | |
'dev_requires': none_list, | |
'test_requires': none_list, | |
'meta_requires': none_list, | |
'extras': ('Provides-Extra', list), | |
'modules': none_list, | |
'namespaces': none_list, | |
'exports': none_dict, | |
'commands': none_dict, | |
'classifiers': ('Classifier', list), | |
'source_url': ('Download-URL', None), | |
'metadata_version': ('Metadata-Version', None), | |
} | |
del none_list, none_dict | |
def __getattribute__(self, key): | |
common = object.__getattribute__(self, 'common_keys') | |
mapped = object.__getattribute__(self, 'mapped_keys') | |
if key in mapped: | |
lk, maker = mapped[key] | |
if self._legacy: | |
if lk is None: | |
result = None if maker is None else maker() | |
else: | |
result = self._legacy.get(lk) | |
else: | |
value = None if maker is None else maker() | |
if key not in ('commands', 'exports', 'modules', 'namespaces', | |
'classifiers'): | |
result = self._data.get(key, value) | |
else: | |
# special cases for PEP 459 | |
sentinel = object() | |
result = sentinel | |
d = self._data.get('extensions') | |
if d: | |
if key == 'commands': | |
result = d.get('python.commands', value) | |
elif key == 'classifiers': | |
d = d.get('python.details') | |
if d: | |
result = d.get(key, value) | |
else: | |
d = d.get('python.exports') | |
if not d: | |
d = self._data.get('python.exports') | |
if d: | |
result = d.get(key, value) | |
if result is sentinel: | |
result = value | |
elif key not in common: | |
result = object.__getattribute__(self, key) | |
elif self._legacy: | |
result = self._legacy.get(key) | |
else: | |
result = self._data.get(key) | |
return result | |
def _validate_value(self, key, value, scheme=None): | |
if key in self.SYNTAX_VALIDATORS: | |
pattern, exclusions = self.SYNTAX_VALIDATORS[key] | |
if (scheme or self.scheme) not in exclusions: | |
m = pattern.match(value) | |
if not m: | |
raise MetadataInvalidError("'%s' is an invalid value for " | |
"the '%s' property" % (value, | |
key)) | |
def __setattr__(self, key, value): | |
self._validate_value(key, value) | |
common = object.__getattribute__(self, 'common_keys') | |
mapped = object.__getattribute__(self, 'mapped_keys') | |
if key in mapped: | |
lk, _ = mapped[key] | |
if self._legacy: | |
if lk is None: | |
raise NotImplementedError | |
self._legacy[lk] = value | |
elif key not in ('commands', 'exports', 'modules', 'namespaces', | |
'classifiers'): | |
self._data[key] = value | |
else: | |
# special cases for PEP 459 | |
d = self._data.setdefault('extensions', {}) | |
if key == 'commands': | |
d['python.commands'] = value | |
elif key == 'classifiers': | |
d = d.setdefault('python.details', {}) | |
d[key] = value | |
else: | |
d = d.setdefault('python.exports', {}) | |
d[key] = value | |
elif key not in common: | |
object.__setattr__(self, key, value) | |
else: | |
if key == 'keywords': | |
if isinstance(value, string_types): | |
value = value.strip() | |
if value: | |
value = value.split() | |
else: | |
value = [] | |
if self._legacy: | |
self._legacy[key] = value | |
else: | |
self._data[key] = value | |
@property | |
def name_and_version(self): | |
return _get_name_and_version(self.name, self.version, True) | |
@property | |
def provides(self): | |
if self._legacy: | |
result = self._legacy['Provides-Dist'] | |
else: | |
result = self._data.setdefault('provides', []) | |
s = '%s (%s)' % (self.name, self.version) | |
if s not in result: | |
result.append(s) | |
return result | |
@provides.setter | |
def provides(self, value): | |
if self._legacy: | |
self._legacy['Provides-Dist'] = value | |
else: | |
self._data['provides'] = value | |
def get_requirements(self, reqts, extras=None, env=None): | |
""" | |
Base method to get dependencies, given a set of extras | |
to satisfy and an optional environment context. | |
:param reqts: A list of sometimes-wanted dependencies, | |
perhaps dependent on extras and environment. | |
:param extras: A list of optional components being requested. | |
:param env: An optional environment for marker evaluation. | |
""" | |
if self._legacy: | |
result = reqts | |
else: | |
result = [] | |
extras = get_extras(extras or [], self.extras) | |
for d in reqts: | |
if 'extra' not in d and 'environment' not in d: | |
# unconditional | |
include = True | |
else: | |
if 'extra' not in d: | |
# Not extra-dependent - only environment-dependent | |
include = True | |
else: | |
include = d.get('extra') in extras | |
if include: | |
# Not excluded because of extras, check environment | |
marker = d.get('environment') | |
if marker: | |
include = interpret(marker, env) | |
if include: | |
result.extend(d['requires']) | |
for key in ('build', 'dev', 'test'): | |
e = ':%s:' % key | |
if e in extras: | |
extras.remove(e) | |
# A recursive call, but it should terminate since 'test' | |
# has been removed from the extras | |
reqts = self._data.get('%s_requires' % key, []) | |
result.extend(self.get_requirements(reqts, extras=extras, | |
env=env)) | |
return result | |
@property | |
def dictionary(self): | |
if self._legacy: | |
return self._from_legacy() | |
return self._data | |
@property | |
def dependencies(self): | |
if self._legacy: | |
raise NotImplementedError | |
else: | |
return extract_by_key(self._data, self.DEPENDENCY_KEYS) | |
@dependencies.setter | |
def dependencies(self, value): | |
if self._legacy: | |
raise NotImplementedError | |
else: | |
self._data.update(value) | |
def _validate_mapping(self, mapping, scheme): | |
if mapping.get('metadata_version') != self.METADATA_VERSION: | |
raise MetadataUnrecognizedVersionError() | |
missing = [] | |
for key, exclusions in self.MANDATORY_KEYS.items(): | |
if key not in mapping: | |
if scheme not in exclusions: | |
missing.append(key) | |
if missing: | |
msg = 'Missing metadata items: %s' % ', '.join(missing) | |
raise MetadataMissingError(msg) | |
for k, v in mapping.items(): | |
self._validate_value(k, v, scheme) | |
def validate(self): | |
if self._legacy: | |
missing, warnings = self._legacy.check(True) | |
if missing or warnings: | |
logger.warning('Metadata: missing: %s, warnings: %s', | |
missing, warnings) | |
else: | |
self._validate_mapping(self._data, self.scheme) | |
def todict(self): | |
if self._legacy: | |
return self._legacy.todict(True) | |
else: | |
result = extract_by_key(self._data, self.INDEX_KEYS) | |
return result | |
def _from_legacy(self): | |
assert self._legacy and not self._data | |
result = { | |
'metadata_version': self.METADATA_VERSION, | |
'generator': self.GENERATOR, | |
} | |
lmd = self._legacy.todict(True) # skip missing ones | |
for k in ('name', 'version', 'license', 'summary', 'description', | |
'classifier'): | |
if k in lmd: | |
if k == 'classifier': | |
nk = 'classifiers' | |
else: | |
nk = k | |
result[nk] = lmd[k] | |
kw = lmd.get('Keywords', []) | |
if kw == ['']: | |
kw = [] | |
result['keywords'] = kw | |
keys = (('requires_dist', 'run_requires'), | |
('setup_requires_dist', 'build_requires')) | |
for ok, nk in keys: | |
if ok in lmd and lmd[ok]: | |
result[nk] = [{'requires': lmd[ok]}] | |
result['provides'] = self.provides | |
author = {} | |
maintainer = {} | |
return result | |
LEGACY_MAPPING = { | |
'name': 'Name', | |
'version': 'Version', | |
'license': 'License', | |
'summary': 'Summary', | |
'description': 'Description', | |
'classifiers': 'Classifier', | |
} | |
def _to_legacy(self): | |
def process_entries(entries): | |
reqts = set() | |
for e in entries: | |
extra = e.get('extra') | |
env = e.get('environment') | |
rlist = e['requires'] | |
for r in rlist: | |
if not env and not extra: | |
reqts.add(r) | |
else: | |
marker = '' | |
if extra: | |
marker = 'extra == "%s"' % extra | |
if env: | |
if marker: | |
marker = '(%s) and %s' % (env, marker) | |
else: | |
marker = env | |
reqts.add(';'.join((r, marker))) | |
return reqts | |
assert self._data and not self._legacy | |
result = LegacyMetadata() | |
nmd = self._data | |
for nk, ok in self.LEGACY_MAPPING.items(): | |
if nk in nmd: | |
result[ok] = nmd[nk] | |
r1 = process_entries(self.run_requires + self.meta_requires) | |
r2 = process_entries(self.build_requires + self.dev_requires) | |
if self.extras: | |
result['Provides-Extra'] = sorted(self.extras) | |
result['Requires-Dist'] = sorted(r1) | |
result['Setup-Requires-Dist'] = sorted(r2) | |
# TODO: other fields such as contacts | |
return result | |
def write(self, path=None, fileobj=None, legacy=False, skip_unknown=True): | |
if [path, fileobj].count(None) != 1: | |
raise ValueError('Exactly one of path and fileobj is needed') | |
self.validate() | |
if legacy: | |
if self._legacy: | |
legacy_md = self._legacy | |
else: | |
legacy_md = self._to_legacy() | |
if path: | |
legacy_md.write(path, skip_unknown=skip_unknown) | |
else: | |
legacy_md.write_file(fileobj, skip_unknown=skip_unknown) | |
else: | |
if self._legacy: | |
d = self._from_legacy() | |
else: | |
d = self._data | |
if fileobj: | |
json.dump(d, fileobj, ensure_ascii=True, indent=2, | |
sort_keys=True) | |
else: | |
with codecs.open(path, 'w', 'utf-8') as f: | |
json.dump(d, f, ensure_ascii=True, indent=2, | |
sort_keys=True) | |
def add_requirements(self, requirements): | |
if self._legacy: | |
self._legacy.add_requirements(requirements) | |
else: | |
run_requires = self._data.setdefault('run_requires', []) | |
always = None | |
for entry in run_requires: | |
if 'environment' not in entry and 'extra' not in entry: | |
always = entry | |
break | |
if always is None: | |
always = { 'requires': requirements } | |
run_requires.insert(0, always) | |
else: | |
rset = set(always['requires']) | set(requirements) | |
always['requires'] = sorted(rset) | |
def __repr__(self): | |
name = self.name or '(no name)' | |
version = self.version or 'no version' | |
return '<%s %s %s (%s)>' % (self.__class__.__name__, | |
self.metadata_version, name, version) |
# -*- coding: utf-8 -*- | |
# | |
# Copyright (C) 2013-2016 Vinay Sajip. | |
# Licensed to the Python Software Foundation under a contributor agreement. | |
# See LICENSE.txt and CONTRIBUTORS.txt. | |
# | |
from __future__ import unicode_literals | |
import bisect | |
import io | |
import logging | |
import os | |
import pkgutil | |
import shutil | |
import sys | |
import types | |
import zipimport | |
from . import DistlibException | |
from .util import cached_property, get_cache_base, path_to_cache_dir, Cache | |
logger = logging.getLogger(__name__) | |
cache = None # created when needed | |
class ResourceCache(Cache): | |
def __init__(self, base=None): | |
if base is None: | |
# Use native string to avoid issues on 2.x: see Python #20140. | |
base = os.path.join(get_cache_base(), str('resource-cache')) | |
super(ResourceCache, self).__init__(base) | |
def is_stale(self, resource, path): | |
""" | |
Is the cache stale for the given resource? | |
:param resource: The :class:`Resource` being cached. | |
:param path: The path of the resource in the cache. | |
:return: True if the cache is stale. | |
""" | |
# Cache invalidation is a hard problem :-) | |
return True | |
def get(self, resource): | |
""" | |
Get a resource into the cache, | |
:param resource: A :class:`Resource` instance. | |
:return: The pathname of the resource in the cache. | |
""" | |
prefix, path = resource.finder.get_cache_info(resource) | |
if prefix is None: | |
result = path | |
else: | |
result = os.path.join(self.base, self.prefix_to_dir(prefix), path) | |
dirname = os.path.dirname(result) | |
if not os.path.isdir(dirname): | |
os.makedirs(dirname) | |
if not os.path.exists(result): | |
stale = True | |
else: | |
stale = self.is_stale(resource, path) | |
if stale: | |
# write the bytes of the resource to the cache location | |
with open(result, 'wb') as f: | |
f.write(resource.bytes) | |
return result | |
class ResourceBase(object): | |
def __init__(self, finder, name): | |
self.finder = finder | |
self.name = name | |
class Resource(ResourceBase): | |
""" | |
A class representing an in-package resource, such as a data file. This is | |
not normally instantiated by user code, but rather by a | |
:class:`ResourceFinder` which manages the resource. | |
""" | |
is_container = False # Backwards compatibility | |
def as_stream(self): | |
""" | |
Get the resource as a stream. | |
This is not a property to make it obvious that it returns a new stream | |
each time. | |
""" | |
return self.finder.get_stream(self) | |
@cached_property | |
def file_path(self): | |
global cache | |
if cache is None: | |
cache = ResourceCache() | |
return cache.get(self) | |
@cached_property | |
def bytes(self): | |
return self.finder.get_bytes(self) | |
@cached_property | |
def size(self): | |
return self.finder.get_size(self) | |
class ResourceContainer(ResourceBase): | |
is_container = True # Backwards compatibility | |
@cached_property | |
def resources(self): | |
return self.finder.get_resources(self) | |
class ResourceFinder(object): | |
""" | |
Resource finder for file system resources. | |
""" | |
if sys.platform.startswith('java'): | |
skipped_extensions = ('.pyc', '.pyo', '.class') | |
else: | |
skipped_extensions = ('.pyc', '.pyo') | |
def __init__(self, module): | |
self.module = module | |
self.loader = getattr(module, '__loader__', None) | |
self.base = os.path.dirname(getattr(module, '__file__', '')) | |
def _adjust_path(self, path): | |
return os.path.realpath(path) | |
def _make_path(self, resource_name): | |
# Issue #50: need to preserve type of path on Python 2.x | |
# like os.path._get_sep | |
if isinstance(resource_name, bytes): # should only happen on 2.x | |
sep = b'/' | |
else: | |
sep = '/' | |
parts = resource_name.split(sep) | |
parts.insert(0, self.base) | |
result = os.path.join(*parts) | |
return self._adjust_path(result) | |
def _find(self, path): | |
return os.path.exists(path) | |
def get_cache_info(self, resource): | |
return None, resource.path | |
def find(self, resource_name): | |
path = self._make_path(resource_name) | |
if not self._find(path): | |
result = None | |
else: | |
if self._is_directory(path): | |
result = ResourceContainer(self, resource_name) | |
else: | |
result = Resource(self, resource_name) | |
result.path = path | |
return result | |
def get_stream(self, resource): | |
return open(resource.path, 'rb') | |
def get_bytes(self, resource): | |
with open(resource.path, 'rb') as f: | |
return f.read() | |
def get_size(self, resource): | |
return os.path.getsize(resource.path) | |
def get_resources(self, resource): | |
def allowed(f): | |
return (f != '__pycache__' and not | |
f.endswith(self.skipped_extensions)) | |
return set([f for f in os.listdir(resource.path) if allowed(f)]) | |
def is_container(self, resource): | |
return self._is_directory(resource.path) | |
_is_directory = staticmethod(os.path.isdir) | |
def iterator(self, resource_name): | |
resource = self.find(resource_name) | |
if resource is not None: | |
todo = [resource] | |
while todo: | |
resource = todo.pop(0) | |
yield resource | |
if resource.is_container: | |
rname = resource.name | |
for name in resource.resources: | |
if not rname: | |
new_name = name | |
else: | |
new_name = '/'.join([rname, name]) | |
child = self.find(new_name) | |
if child.is_container: | |
todo.append(child) | |
else: | |
yield child | |
class ZipResourceFinder(ResourceFinder): | |
""" | |
Resource finder for resources in .zip files. | |
""" | |
def __init__(self, module): | |
super(ZipResourceFinder, self).__init__(module) | |
archive = self.loader.archive | |
self.prefix_len = 1 + len(archive) | |
# PyPy doesn't have a _files attr on zipimporter, and you can't set one | |
if hasattr(self.loader, '_files'): | |
self._files = self.loader._files | |
else: | |
self._files = zipimport._zip_directory_cache[archive] | |
self.index = sorted(self._files) | |
def _adjust_path(self, path): | |
return path | |
def _find(self, path): | |
path = path[self.prefix_len:] | |
if path in self._files: | |
result = True | |
else: | |
if path and path[-1] != os.sep: | |
path = path + os.sep | |
i = bisect.bisect(self.index, path) | |
try: | |
result = self.index[i].startswith(path) | |
except IndexError: | |
result = False | |
if not result: | |
logger.debug('_find failed: %r %r', path, self.loader.prefix) | |
else: | |
logger.debug('_find worked: %r %r', path, self.loader.prefix) | |
return result | |
def get_cache_info(self, resource): | |
prefix = self.loader.archive | |
path = resource.path[1 + len(prefix):] | |
return prefix, path | |
def get_bytes(self, resource): | |
return self.loader.get_data(resource.path) | |
def get_stream(self, resource): | |
return io.BytesIO(self.get_bytes(resource)) | |
def get_size(self, resource): | |
path = resource.path[self.prefix_len:] | |
return self._files[path][3] | |
def get_resources(self, resource): | |
path = resource.path[self.prefix_len:] | |
if path and path[-1] != os.sep: | |
path += os.sep | |
plen = len(path) | |
result = set() | |
i = bisect.bisect(self.index, path) | |
while i < len(self.index): | |
if not self.index[i].startswith(path): | |
break | |
s = self.index[i][plen:] | |
result.add(s.split(os.sep, 1)[0]) # only immediate children | |
i += 1 | |
return result | |
def _is_directory(self, path): | |
path = path[self.prefix_len:] | |
if path and path[-1] != os.sep: | |
path += os.sep | |
i = bisect.bisect(self.index, path) | |
try: | |
result = self.index[i].startswith(path) | |
except IndexError: | |
result = False | |
return result | |
_finder_registry = { | |
type(None): ResourceFinder, | |
zipimport.zipimporter: ZipResourceFinder | |
} | |
try: | |
# In Python 3.6, _frozen_importlib -> _frozen_importlib_external | |
try: | |
import _frozen_importlib_external as _fi | |
except ImportError: | |
import _frozen_importlib as _fi | |
_finder_registry[_fi.SourceFileLoader] = ResourceFinder | |
_finder_registry[_fi.FileFinder] = ResourceFinder | |
del _fi | |
except (ImportError, AttributeError): | |
pass | |
def register_finder(loader, finder_maker): | |
_finder_registry[type(loader)] = finder_maker | |
_finder_cache = {} | |
def finder(package): | |
""" | |
Return a resource finder for a package. | |
:param package: The name of the package. | |
:return: A :class:`ResourceFinder` instance for the package. | |
""" | |
if package in _finder_cache: | |
result = _finder_cache[package] | |
else: | |
if package not in sys.modules: | |
__import__(package) | |
module = sys.modules[package] | |
path = getattr(module, '__path__', None) | |
if path is None: | |
raise DistlibException('You cannot get a finder for a module, ' | |
'only for a package') | |
loader = getattr(module, '__loader__', None) | |
finder_maker = _finder_registry.get(type(loader)) | |
if finder_maker is None: | |
raise DistlibException('Unable to locate finder for %r' % package) | |
result = finder_maker(module) | |
_finder_cache[package] = result | |
return result | |
_dummy_module = types.ModuleType(str('__dummy__')) | |
def finder_for_path(path): | |
""" | |
Return a resource finder for a path, which should represent a container. | |
:param path: The path. | |
:return: A :class:`ResourceFinder` instance for the path. | |
""" | |
result = None | |
# calls any path hooks, gets importer into cache | |
pkgutil.get_importer(path) | |
loader = sys.path_importer_cache.get(path) | |
finder = _finder_registry.get(type(loader)) | |
if finder: | |
module = _dummy_module | |
module.__file__ = os.path.join(path, '') | |
module.__loader__ = loader | |
result = finder(module) | |
return result |
# -*- coding: utf-8 -*- | |
# | |
# Copyright (C) 2013-2015 Vinay Sajip. | |
# Licensed to the Python Software Foundation under a contributor agreement. | |
# See LICENSE.txt and CONTRIBUTORS.txt. | |
# | |
from io import BytesIO | |
import logging | |
import os | |
import re | |
import struct | |
import sys | |
from .compat import sysconfig, detect_encoding, ZipFile | |
from .resources import finder | |
from .util import (FileOperator, get_export_entry, convert_path, | |
get_executable, in_venv) | |
logger = logging.getLogger(__name__) | |
_DEFAULT_MANIFEST = ''' | |
<?xml version="1.0" encoding="UTF-8" standalone="yes"?> | |
<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0"> | |
<assemblyIdentity version="1.0.0.0" | |
processorArchitecture="X86" | |
name="%s" | |
type="win32"/> | |
<!-- Identify the application security requirements. --> | |
<trustInfo xmlns="urn:schemas-microsoft-com:asm.v3"> | |
<security> | |
<requestedPrivileges> | |
<requestedExecutionLevel level="asInvoker" uiAccess="false"/> | |
</requestedPrivileges> | |
</security> | |
</trustInfo> | |
</assembly>'''.strip() | |
# check if Python is called on the first line with this expression | |
FIRST_LINE_RE = re.compile(b'^#!.*pythonw?[0-9.]*([ \t].*)?$') | |
SCRIPT_TEMPLATE = '''# -*- coding: utf-8 -*- | |
if __name__ == '__main__': | |
import sys, re | |
def _resolve(module, func): | |
__import__(module) | |
mod = sys.modules[module] | |
parts = func.split('.') | |
result = getattr(mod, parts.pop(0)) | |
for p in parts: | |
result = getattr(result, p) | |
return result | |
try: | |
sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) | |
func = _resolve('%(module)s', '%(func)s') | |
rc = func() # None interpreted as 0 | |
except Exception as e: # only supporting Python >= 2.6 | |
sys.stderr.write('%%s\\n' %% e) | |
rc = 1 | |
sys.exit(rc) | |
''' | |
def _enquote_executable(executable): | |
if ' ' in executable: | |
# make sure we quote only the executable in case of env | |
# for example /usr/bin/env "/dir with spaces/bin/jython" | |
# instead of "/usr/bin/env /dir with spaces/bin/jython" | |
# otherwise whole | |
if executable.startswith('/usr/bin/env '): | |
env, _executable = executable.split(' ', 1) | |
if ' ' in _executable and not _executable.startswith('"'): | |
executable = '%s "%s"' % (env, _executable) | |
else: | |
if not executable.startswith('"'): | |
executable = '"%s"' % executable | |
return executable | |
class ScriptMaker(object): | |
""" | |
A class to copy or create scripts from source scripts or callable | |
specifications. | |
""" | |
script_template = SCRIPT_TEMPLATE | |
executable = None # for shebangs | |
def __init__(self, source_dir, target_dir, add_launchers=True, | |
dry_run=False, fileop=None): | |
self.source_dir = source_dir | |
self.target_dir = target_dir | |
self.add_launchers = add_launchers | |
self.force = False | |
self.clobber = False | |
# It only makes sense to set mode bits on POSIX. | |
self.set_mode = (os.name == 'posix') or (os.name == 'java' and | |
os._name == 'posix') | |
self.variants = set(('', 'X.Y')) | |
self._fileop = fileop or FileOperator(dry_run) | |
self._is_nt = os.name == 'nt' or ( | |
os.name == 'java' and os._name == 'nt') | |
def _get_alternate_executable(self, executable, options): | |
if options.get('gui', False) and self._is_nt: # pragma: no cover | |
dn, fn = os.path.split(executable) | |
fn = fn.replace('python', 'pythonw') | |
executable = os.path.join(dn, fn) | |
return executable | |
if sys.platform.startswith('java'): # pragma: no cover | |
def _is_shell(self, executable): | |
""" | |
Determine if the specified executable is a script | |
(contains a #! line) | |
""" | |
try: | |
with open(executable) as fp: | |
return fp.read(2) == '#!' | |
except (OSError, IOError): | |
logger.warning('Failed to open %s', executable) | |
return False | |
def _fix_jython_executable(self, executable): | |
if self._is_shell(executable): | |
# Workaround for Jython is not needed on Linux systems. | |
import java | |
if java.lang.System.getProperty('os.name') == 'Linux': | |
return executable | |
elif executable.lower().endswith('jython.exe'): | |
# Use wrapper exe for Jython on Windows | |
return executable | |
return '/usr/bin/env %s' % executable | |
def _get_shebang(self, encoding, post_interp=b'', options=None): | |
enquote = True | |
if self.executable: | |
executable = self.executable | |
enquote = False # assume this will be taken care of | |
elif not sysconfig.is_python_build(): | |
executable = get_executable() | |
elif in_venv(): # pragma: no cover | |
executable = os.path.join(sysconfig.get_path('scripts'), | |
'python%s' % sysconfig.get_config_var('EXE')) | |
else: # pragma: no cover | |
executable = os.path.join( | |
sysconfig.get_config_var('BINDIR'), | |
'python%s%s' % (sysconfig.get_config_var('VERSION'), | |
sysconfig.get_config_var('EXE'))) | |
if options: | |
executable = self._get_alternate_executable(executable, options) | |
if sys.platform.startswith('java'): # pragma: no cover | |
executable = self._fix_jython_executable(executable) | |
# Normalise case for Windows | |
executable = os.path.normcase(executable) | |
# If the user didn't specify an executable, it may be necessary to | |
# cater for executable paths with spaces (not uncommon on Windows) | |
if enquote: | |
executable = _enquote_executable(executable) | |
# Issue #51: don't use fsencode, since we later try to | |
# check that the shebang is decodable using utf-8. | |
executable = executable.encode('utf-8') | |
# in case of IronPython, play safe and enable frames support | |
if (sys.platform == 'cli' and '-X:Frames' not in post_interp | |
and '-X:FullFrames' not in post_interp): # pragma: no cover | |
post_interp += b' -X:Frames' | |
shebang = b'#!' + executable + post_interp + b'\n' | |
# Python parser starts to read a script using UTF-8 until | |
# it gets a #coding:xxx cookie. The shebang has to be the | |
# first line of a file, the #coding:xxx cookie cannot be | |
# written before. So the shebang has to be decodable from | |
# UTF-8. | |
try: | |
shebang.decode('utf-8') | |
except UnicodeDecodeError: # pragma: no cover | |
raise ValueError( | |
'The shebang (%r) is not decodable from utf-8' % shebang) | |
# If the script is encoded to a custom encoding (use a | |
# #coding:xxx cookie), the shebang has to be decodable from | |
# the script encoding too. | |
if encoding != 'utf-8': | |
try: | |
shebang.decode(encoding) | |
except UnicodeDecodeError: # pragma: no cover | |
raise ValueError( | |
'The shebang (%r) is not decodable ' | |
'from the script encoding (%r)' % (shebang, encoding)) | |
return shebang | |
def _get_script_text(self, entry): | |
return self.script_template % dict(module=entry.prefix, | |
func=entry.suffix) | |
manifest = _DEFAULT_MANIFEST | |
def get_manifest(self, exename): | |
base = os.path.basename(exename) | |
return self.manifest % base | |
def _write_script(self, names, shebang, script_bytes, filenames, ext): | |
use_launcher = self.add_launchers and self._is_nt | |
linesep = os.linesep.encode('utf-8') | |
if not use_launcher: | |
script_bytes = shebang + linesep + script_bytes | |
else: # pragma: no cover | |
if ext == 'py': | |
launcher = self._get_launcher('t') | |
else: | |
launcher = self._get_launcher('w') | |
stream = BytesIO() | |
with ZipFile(stream, 'w') as zf: | |
zf.writestr('__main__.py', script_bytes) | |
zip_data = stream.getvalue() | |
script_bytes = launcher + shebang + linesep + zip_data | |
for name in names: | |
outname = os.path.join(self.target_dir, name) | |
if use_launcher: # pragma: no cover | |
n, e = os.path.splitext(outname) | |
if e.startswith('.py'): | |
outname = n | |
outname = '%s.exe' % outname | |
try: | |
self._fileop.write_binary_file(outname, script_bytes) | |
except Exception: | |
# Failed writing an executable - it might be in use. | |
logger.warning('Failed to write executable - trying to ' | |
'use .deleteme logic') | |
dfname = '%s.deleteme' % outname | |
if os.path.exists(dfname): | |
os.remove(dfname) # Not allowed to fail here | |
os.rename(outname, dfname) # nor here | |
self._fileop.write_binary_file(outname, script_bytes) | |
logger.debug('Able to replace executable using ' | |
'.deleteme logic') | |
try: | |
os.remove(dfname) | |
except Exception: | |
pass # still in use - ignore error | |
else: | |
if self._is_nt and not outname.endswith('.' + ext): # pragma: no cover | |
outname = '%s.%s' % (outname, ext) | |
if os.path.exists(outname) and not self.clobber: | |
logger.warning('Skipping existing file %s', outname) | |
continue | |
self._fileop.write_binary_file(outname, script_bytes) | |
if self.set_mode: | |
self._fileop.set_executable_mode([outname]) | |
filenames.append(outname) | |
def _make_script(self, entry, filenames, options=None): | |
post_interp = b'' | |
if options: | |
args = options.get('interpreter_args', []) | |
if args: | |
args = ' %s' % ' '.join(args) | |
post_interp = args.encode('utf-8') | |
shebang = self._get_shebang('utf-8', post_interp, options=options) | |
script = self._get_script_text(entry).encode('utf-8') | |
name = entry.name | |
scriptnames = set() | |
if '' in self.variants: | |
scriptnames.add(name) | |
if 'X' in self.variants: | |
scriptnames.add('%s%s' % (name, sys.version[0])) | |
if 'X.Y' in self.variants: | |
scriptnames.add('%s-%s' % (name, sys.version[:3])) | |
if options and options.get('gui', False): | |
ext = 'pyw' | |
else: | |
ext = 'py' | |
self._write_script(scriptnames, shebang, script, filenames, ext) | |
def _copy_script(self, script, filenames): | |
adjust = False | |
script = os.path.join(self.source_dir, convert_path(script)) | |
outname = os.path.join(self.target_dir, os.path.basename(script)) | |
if not self.force and not self._fileop.newer(script, outname): | |
logger.debug('not copying %s (up-to-date)', script) | |
return | |
# Always open the file, but ignore failures in dry-run mode -- | |
# that way, we'll get accurate feedback if we can read the | |
# script. | |
try: | |
f = open(script, 'rb') | |
except IOError: # pragma: no cover | |
if not self.dry_run: | |
raise | |
f = None | |
else: | |
first_line = f.readline() | |
if not first_line: # pragma: no cover | |
logger.warning('%s: %s is an empty file (skipping)', | |
self.get_command_name(), script) | |
return | |
match = FIRST_LINE_RE.match(first_line.replace(b'\r\n', b'\n')) | |
if match: | |
adjust = True | |
post_interp = match.group(1) or b'' | |
if not adjust: | |
if f: | |
f.close() | |
self._fileop.copy_file(script, outname) | |
if self.set_mode: | |
self._fileop.set_executable_mode([outname]) | |
filenames.append(outname) | |
else: | |
logger.info('copying and adjusting %s -> %s', script, | |
self.target_dir) | |
if not self._fileop.dry_run: | |
encoding, lines = detect_encoding(f.readline) | |
f.seek(0) | |
shebang = self._get_shebang(encoding, post_interp) | |
if b'pythonw' in first_line: # pragma: no cover | |
ext = 'pyw' | |
else: | |
ext = 'py' | |
n = os.path.basename(outname) | |
self._write_script([n], shebang, f.read(), filenames, ext) | |
if f: | |
f.close() | |
@property | |
def dry_run(self): | |
return self._fileop.dry_run | |
@dry_run.setter | |
def dry_run(self, value): | |
self._fileop.dry_run = value | |
if os.name == 'nt' or (os.name == 'java' and os._name == 'nt'): # pragma: no cover | |
# Executable launcher support. | |
# Launchers are from https://bitbucket.org/vinay.sajip/simple_launcher/ | |
def _get_launcher(self, kind): | |
if struct.calcsize('P') == 8: # 64-bit | |
bits = '64' | |
else: | |
bits = '32' | |
name = '%s%s.exe' % (kind, bits) | |
# Issue 31: don't hardcode an absolute package name, but | |
# determine it relative to the current package | |
distlib_package = __name__.rsplit('.', 1)[0] | |
result = finder(distlib_package).find(name).bytes | |
return result | |
# Public API follows | |
def make(self, specification, options=None): | |
""" | |
Make a script. | |
:param specification: The specification, which is either a valid export | |
entry specification (to make a script from a | |
callable) or a filename (to make a script by | |
copying from a source location). | |
:param options: A dictionary of options controlling script generation. | |
:return: A list of all absolute pathnames written to. | |
""" | |
filenames = [] | |
entry = get_export_entry(specification) | |
if entry is None: | |
self._copy_script(specification, filenames) | |
else: | |
self._make_script(entry, filenames, options=options) | |
return filenames | |
def make_multiple(self, specifications, options=None): | |
""" | |
Take a list of specifications and make scripts from them, | |
:param specifications: A list of specifications. | |
:return: A list of all absolute pathnames written to, | |
""" | |
filenames = [] | |
for specification in specifications: | |
filenames.extend(self.make(specification, options)) | |
return filenames |
# | |
# Copyright (C) 2012-2016 The Python Software Foundation. | |
# See LICENSE.txt and CONTRIBUTORS.txt. | |
# | |
import codecs | |
from collections import deque | |
import contextlib | |
import csv | |
from glob import iglob as std_iglob | |
import io | |
import json | |
import logging | |
import os | |
import py_compile | |
import re | |
import shutil | |
import socket | |
try: | |
import ssl | |
except ImportError: # pragma: no cover | |
ssl = None | |
import subprocess | |
import sys | |
import tarfile | |
import tempfile | |
import textwrap | |
try: | |
import threading | |
except ImportError: # pragma: no cover | |
import dummy_threading as threading | |
import time | |
from . import DistlibException | |
from .compat import (string_types, text_type, shutil, raw_input, StringIO, | |
cache_from_source, urlopen, urljoin, httplib, xmlrpclib, | |
splittype, HTTPHandler, BaseConfigurator, valid_ident, | |
Container, configparser, URLError, ZipFile, fsdecode, | |
unquote) | |
logger = logging.getLogger(__name__) | |
# | |
# Requirement parsing code for name + optional constraints + optional extras | |
# | |
# e.g. 'foo >= 1.2, < 2.0 [bar, baz]' | |
# | |
# The regex can seem a bit hairy, so we build it up out of smaller pieces | |
# which are manageable. | |
# | |
COMMA = r'\s*,\s*' | |
COMMA_RE = re.compile(COMMA) | |
IDENT = r'(\w|[.-])+' | |
EXTRA_IDENT = r'(\*|:(\*|\w+):|' + IDENT + ')' | |
VERSPEC = IDENT + r'\*?' | |
RELOP = '([<>=!~]=)|[<>]' | |
# | |
# The first relop is optional - if absent, will be taken as '~=' | |
# | |
BARE_CONSTRAINTS = ('(' + RELOP + r')?\s*(' + VERSPEC + ')(' + COMMA + '(' + | |
RELOP + r')\s*(' + VERSPEC + '))*') | |
DIRECT_REF = '(from\s+(?P<diref>.*))' | |
# | |
# Either the bare constraints or the bare constraints in parentheses | |
# | |
CONSTRAINTS = (r'\(\s*(?P<c1>' + BARE_CONSTRAINTS + '|' + DIRECT_REF + | |
r')\s*\)|(?P<c2>' + BARE_CONSTRAINTS + '\s*)') | |
EXTRA_LIST = EXTRA_IDENT + '(' + COMMA + EXTRA_IDENT + ')*' | |
EXTRAS = r'\[\s*(?P<ex>' + EXTRA_LIST + r')?\s*\]' | |
REQUIREMENT = ('(?P<dn>' + IDENT + r')\s*(' + EXTRAS + r'\s*)?(\s*' + | |
CONSTRAINTS + ')?$') | |
REQUIREMENT_RE = re.compile(REQUIREMENT) | |
# | |
# Used to scan through the constraints | |
# | |
RELOP_IDENT = '(?P<op>' + RELOP + r')\s*(?P<vn>' + VERSPEC + ')' | |
RELOP_IDENT_RE = re.compile(RELOP_IDENT) | |
def parse_requirement(s): | |
def get_constraint(m): | |
d = m.groupdict() | |
return d['op'], d['vn'] | |
result = None | |
m = REQUIREMENT_RE.match(s) | |
if m: | |
d = m.groupdict() | |
name = d['dn'] | |
cons = d['c1'] or d['c2'] | |
if not d['diref']: | |
url = None | |
else: | |
# direct reference | |
cons = None | |
url = d['diref'].strip() | |
if not cons: | |
cons = None | |
constr = '' | |
rs = d['dn'] | |
else: | |
if cons[0] not in '<>!=': | |
cons = '~=' + cons | |
iterator = RELOP_IDENT_RE.finditer(cons) | |
cons = [get_constraint(m) for m in iterator] | |
rs = '%s (%s)' % (name, ', '.join(['%s %s' % con for con in cons])) | |
if not d['ex']: | |
extras = None | |
else: | |
extras = COMMA_RE.split(d['ex']) | |
result = Container(name=name, constraints=cons, extras=extras, | |
requirement=rs, source=s, url=url) | |
return result | |
def get_resources_dests(resources_root, rules): | |
"""Find destinations for resources files""" | |
def get_rel_path(base, path): | |
# normalizes and returns a lstripped-/-separated path | |
base = base.replace(os.path.sep, '/') | |
path = path.replace(os.path.sep, '/') | |
assert path.startswith(base) | |
return path[len(base):].lstrip('/') | |
destinations = {} | |
for base, suffix, dest in rules: | |
prefix = os.path.join(resources_root, base) | |
for abs_base in iglob(prefix): | |
abs_glob = os.path.join(abs_base, suffix) | |
for abs_path in iglob(abs_glob): | |
resource_file = get_rel_path(resources_root, abs_path) | |
if dest is None: # remove the entry if it was here | |
destinations.pop(resource_file, None) | |
else: | |
rel_path = get_rel_path(abs_base, abs_path) | |
rel_dest = dest.replace(os.path.sep, '/').rstrip('/') | |
destinations[resource_file] = rel_dest + '/' + rel_path | |
return destinations | |
def in_venv(): | |
if hasattr(sys, 'real_prefix'): | |
# virtualenv venvs | |
result = True | |
else: | |
# PEP 405 venvs | |
result = sys.prefix != getattr(sys, 'base_prefix', sys.prefix) | |
return result | |
def get_executable(): | |
# The __PYVENV_LAUNCHER__ dance is apparently no longer needed, as | |
# changes to the stub launcher mean that sys.executable always points | |
# to the stub on macOS | |
# if sys.platform == 'darwin' and ('__PYVENV_LAUNCHER__' | |
# in os.environ): | |
# result = os.environ['__PYVENV_LAUNCHER__'] | |
# else: | |
# result = sys.executable | |
# return result | |
result = os.path.normcase(sys.executable) | |
if not isinstance(result, text_type): | |
result = fsdecode(result) | |
return result | |
def proceed(prompt, allowed_chars, error_prompt=None, default=None): | |
p = prompt | |
while True: | |
s = raw_input(p) | |
p = prompt | |
if not s and default: | |
s = default | |
if s: | |
c = s[0].lower() | |
if c in allowed_chars: | |
break | |
if error_prompt: | |
p = '%c: %s\n%s' % (c, error_prompt, prompt) | |
return c | |
def extract_by_key(d, keys): | |
if isinstance(keys, string_types): | |
keys = keys.split() | |
result = {} | |
for key in keys: | |
if key in d: | |
result[key] = d[key] | |
return result | |
def read_exports(stream): | |
if sys.version_info[0] >= 3: | |
# needs to be a text stream | |
stream = codecs.getreader('utf-8')(stream) | |
# Try to load as JSON, falling back on legacy format | |
data = stream.read() | |
stream = StringIO(data) | |
try: | |
jdata = json.load(stream) | |
result = jdata['extensions']['python.exports']['exports'] | |
for group, entries in result.items(): | |
for k, v in entries.items(): | |
s = '%s = %s' % (k, v) | |
entry = get_export_entry(s) | |
assert entry is not None | |
entries[k] = entry | |
return result | |
except Exception: | |
stream.seek(0, 0) | |
def read_stream(cp, stream): | |
if hasattr(cp, 'read_file'): | |
cp.read_file(stream) | |
else: | |
cp.readfp(stream) | |
cp = configparser.ConfigParser() | |
try: | |
read_stream(cp, stream) | |
except configparser.MissingSectionHeaderError: | |
stream.close() | |
data = textwrap.dedent(data) | |
stream = StringIO(data) | |
read_stream(cp, stream) | |
result = {} | |
for key in cp.sections(): | |
result[key] = entries = {} | |
for name, value in cp.items(key): | |
s = '%s = %s' % (name, value) | |
entry = get_export_entry(s) | |
assert entry is not None | |
#entry.dist = self | |
entries[name] = entry | |
return result | |
def write_exports(exports, stream): | |
if sys.version_info[0] >= 3: | |
# needs to be a text stream | |
stream = codecs.getwriter('utf-8')(stream) | |
cp = configparser.ConfigParser() | |
for k, v in exports.items(): | |
# TODO check k, v for valid values | |
cp.add_section(k) | |
for entry in v.values(): | |
if entry.suffix is None: | |
s = entry.prefix | |
else: | |
s = '%s:%s' % (entry.prefix, entry.suffix) | |
if entry.flags: | |
s = '%s [%s]' % (s, ', '.join(entry.flags)) | |
cp.set(k, entry.name, s) | |
cp.write(stream) | |
@contextlib.contextmanager | |
def tempdir(): | |
td = tempfile.mkdtemp() | |
try: | |
yield td | |
finally: | |
shutil.rmtree(td) | |
@contextlib.contextmanager | |
def chdir(d): | |
cwd = os.getcwd() | |
try: | |
os.chdir(d) | |
yield | |
finally: | |
os.chdir(cwd) | |
@contextlib.contextmanager | |
def socket_timeout(seconds=15): | |
cto = socket.getdefaulttimeout() | |
try: | |
socket.setdefaulttimeout(seconds) | |
yield | |
finally: | |
socket.setdefaulttimeout(cto) | |
class cached_property(object): | |
def __init__(self, func): | |
self.func = func | |
#for attr in ('__name__', '__module__', '__doc__'): | |
# setattr(self, attr, getattr(func, attr, None)) | |
def __get__(self, obj, cls=None): | |
if obj is None: | |
return self | |
value = self.func(obj) | |
object.__setattr__(obj, self.func.__name__, value) | |
#obj.__dict__[self.func.__name__] = value = self.func(obj) | |
return value | |
def convert_path(pathname): | |
"""Return 'pathname' as a name that will work on the native filesystem. | |
The path is split on '/' and put back together again using the current | |
directory separator. Needed because filenames in the setup script are | |
always supplied in Unix style, and have to be converted to the local | |
convention before we can actually use them in the filesystem. Raises | |
ValueError on non-Unix-ish systems if 'pathname' either starts or | |
ends with a slash. | |
""" | |
if os.sep == '/': | |
return pathname | |
if not pathname: | |
return pathname | |
if pathname[0] == '/': | |
raise ValueError("path '%s' cannot be absolute" % pathname) | |
if pathname[-1] == '/': | |
raise ValueError("path '%s' cannot end with '/'" % pathname) | |
paths = pathname.split('/') | |
while os.curdir in paths: | |
paths.remove(os.curdir) | |
if not paths: | |
return os.curdir | |
return os.path.join(*paths) | |
class FileOperator(object): | |
def __init__(self, dry_run=False): | |
self.dry_run = dry_run | |
self.ensured = set() | |
self._init_record() | |
def _init_record(self): | |
self.record = False | |
self.files_written = set() | |
self.dirs_created = set() | |
def record_as_written(self, path): | |
if self.record: | |
self.files_written.add(path) | |
def newer(self, source, target): | |
"""Tell if the target is newer than the source. | |
Returns true if 'source' exists and is more recently modified than | |
'target', or if 'source' exists and 'target' doesn't. | |
Returns false if both exist and 'target' is the same age or younger | |
than 'source'. Raise PackagingFileError if 'source' does not exist. | |
Note that this test is not very accurate: files created in the same | |
second will have the same "age". | |
""" | |
if not os.path.exists(source): | |
raise DistlibException("file '%r' does not exist" % | |
os.path.abspath(source)) | |
if not os.path.exists(target): | |
return True | |
return os.stat(source).st_mtime > os.stat(target).st_mtime | |
def copy_file(self, infile, outfile, check=True): | |
"""Copy a file respecting dry-run and force flags. | |
""" | |
self.ensure_dir(os.path.dirname(outfile)) | |
logger.info('Copying %s to %s', infile, outfile) | |
if not self.dry_run: | |
msg = None | |
if check: | |
if os.path.islink(outfile): | |
msg = '%s is a symlink' % outfile | |
elif os.path.exists(outfile) and not os.path.isfile(outfile): | |
msg = '%s is a non-regular file' % outfile | |
if msg: | |
raise ValueError(msg + ' which would be overwritten') | |
shutil.copyfile(infile, outfile) | |
self.record_as_written(outfile) | |
def copy_stream(self, instream, outfile, encoding=None): | |
assert not os.path.isdir(outfile) | |
self.ensure_dir(os.path.dirname(outfile)) | |
logger.info('Copying stream %s to %s', instream, outfile) | |
if not self.dry_run: | |
if encoding is None: | |
outstream = open(outfile, 'wb') | |
else: | |
outstream = codecs.open(outfile, 'w', encoding=encoding) | |
try: | |
shutil.copyfileobj(instream, outstream) | |
finally: | |
outstream.close() | |
self.record_as_written(outfile) | |
def write_binary_file(self, path, data): | |
self.ensure_dir(os.path.dirname(path)) | |
if not self.dry_run: | |
with open(path, 'wb') as f: | |
f.write(data) | |
self.record_as_written(path) | |
def write_text_file(self, path, data, encoding): | |
self.ensure_dir(os.path.dirname(path)) | |
if not self.dry_run: | |
with open(path, 'wb') as f: | |
f.write(data.encode(encoding)) | |
self.record_as_written(path) | |
def set_mode(self, bits, mask, files): | |
if os.name == 'posix' or (os.name == 'java' and os._name == 'posix'): | |
# Set the executable bits (owner, group, and world) on | |
# all the files specified. | |
for f in files: | |
if self.dry_run: | |
logger.info("changing mode of %s", f) | |
else: | |
mode = (os.stat(f).st_mode | bits) & mask | |
logger.info("changing mode of %s to %o", f, mode) | |
os.chmod(f, mode) | |
set_executable_mode = lambda s, f: s.set_mode(0o555, 0o7777, f) | |
def ensure_dir(self, path): | |
path = os.path.abspath(path) | |
if path not in self.ensured and not os.path.exists(path): | |
self.ensured.add(path) | |
d, f = os.path.split(path) | |
self.ensure_dir(d) | |
logger.info('Creating %s' % path) | |
if not self.dry_run: | |
os.mkdir(path) | |
if self.record: | |
self.dirs_created.add(path) | |
def byte_compile(self, path, optimize=False, force=False, prefix=None): | |
dpath = cache_from_source(path, not optimize) | |
logger.info('Byte-compiling %s to %s', path, dpath) | |
if not self.dry_run: | |
if force or self.newer(path, dpath): | |
if not prefix: | |
diagpath = None | |
else: | |
assert path.startswith(prefix) | |
diagpath = path[len(prefix):] | |
py_compile.compile(path, dpath, diagpath, True) # raise error | |
self.record_as_written(dpath) | |
return dpath | |
def ensure_removed(self, path): | |
if os.path.exists(path): | |
if os.path.isdir(path) and not os.path.islink(path): | |
logger.debug('Removing directory tree at %s', path) | |
if not self.dry_run: | |
shutil.rmtree(path) | |
if self.record: | |
if path in self.dirs_created: | |
self.dirs_created.remove(path) | |
else: | |
if os.path.islink(path): | |
s = 'link' | |
else: | |
s = 'file' | |
logger.debug('Removing %s %s', s, path) | |
if not self.dry_run: | |
os.remove(path) | |
if self.record: | |
if path in self.files_written: | |
self.files_written.remove(path) | |
def is_writable(self, path): | |
result = False | |
while not result: | |
if os.path.exists(path): | |
result = os.access(path, os.W_OK) | |
break | |
parent = os.path.dirname(path) | |
if parent == path: | |
break | |
path = parent | |
return result | |
def commit(self): | |
""" | |
Commit recorded changes, turn off recording, return | |
changes. | |
""" | |
assert self.record | |
result = self.files_written, self.dirs_created | |
self._init_record() | |
return result | |
def rollback(self): | |
if not self.dry_run: | |
for f in list(self.files_written): | |
if os.path.exists(f): | |
os.remove(f) | |
# dirs should all be empty now, except perhaps for | |
# __pycache__ subdirs | |
# reverse so that subdirs appear before their parents | |
dirs = sorted(self.dirs_created, reverse=True) | |
for d in dirs: | |
flist = os.listdir(d) | |
if flist: | |
assert flist == ['__pycache__'] | |
sd = os.path.join(d, flist[0]) | |
os.rmdir(sd) | |
os.rmdir(d) # should fail if non-empty | |
self._init_record() | |
def resolve(module_name, dotted_path): | |
if module_name in sys.modules: | |
mod = sys.modules[module_name] | |
else: | |
mod = __import__(module_name) | |
if dotted_path is None: | |
result = mod | |
else: | |
parts = dotted_path.split('.') | |
result = getattr(mod, parts.pop(0)) | |
for p in parts: | |
result = getattr(result, p) | |
return result | |
class ExportEntry(object): | |
def __init__(self, name, prefix, suffix, flags): | |
self.name = name | |
self.prefix = prefix | |
self.suffix = suffix | |
self.flags = flags | |
@cached_property | |
def value(self): | |
return resolve(self.prefix, self.suffix) | |
def __repr__(self): # pragma: no cover | |
return '<ExportEntry %s = %s:%s %s>' % (self.name, self.prefix, | |
self.suffix, self.flags) | |
def __eq__(self, other): | |
if not isinstance(other, ExportEntry): | |
result = False | |
else: | |
result = (self.name == other.name and | |
self.prefix == other.prefix and | |
self.suffix == other.suffix and | |
self.flags == other.flags) | |
return result | |
__hash__ = object.__hash__ | |
ENTRY_RE = re.compile(r'''(?P<name>(\w|[-.+])+) | |
\s*=\s*(?P<callable>(\w+)([:\.]\w+)*) | |
\s*(\[\s*(?P<flags>\w+(=\w+)?(,\s*\w+(=\w+)?)*)\s*\])? | |
''', re.VERBOSE) | |
def get_export_entry(specification): | |
m = ENTRY_RE.search(specification) | |
if not m: | |
result = None | |
if '[' in specification or ']' in specification: | |
raise DistlibException("Invalid specification " | |
"'%s'" % specification) | |
else: | |
d = m.groupdict() | |
name = d['name'] | |
path = d['callable'] | |
colons = path.count(':') | |
if colons == 0: | |
prefix, suffix = path, None | |
else: | |
if colons != 1: | |
raise DistlibException("Invalid specification " | |
"'%s'" % specification) | |
prefix, suffix = path.split(':') | |
flags = d['flags'] | |
if flags is None: | |
if '[' in specification or ']' in specification: | |
raise DistlibException("Invalid specification " | |
"'%s'" % specification) | |
flags = [] | |
else: | |
flags = [f.strip() for f in flags.split(',')] | |
result = ExportEntry(name, prefix, suffix, flags) | |
return result | |
def get_cache_base(suffix=None): | |
""" | |
Return the default base location for distlib caches. If the directory does | |
not exist, it is created. Use the suffix provided for the base directory, | |
and default to '.distlib' if it isn't provided. | |
On Windows, if LOCALAPPDATA is defined in the environment, then it is | |
assumed to be a directory, and will be the parent directory of the result. | |
On POSIX, and on Windows if LOCALAPPDATA is not defined, the user's home | |
directory - using os.expanduser('~') - will be the parent directory of | |
the result. | |
The result is just the directory '.distlib' in the parent directory as | |
determined above, or with the name specified with ``suffix``. | |
""" | |
if suffix is None: | |
suffix = '.distlib' | |
if os.name == 'nt' and 'LOCALAPPDATA' in os.environ: | |
result = os.path.expandvars('$localappdata') | |
else: | |
# Assume posix, or old Windows | |
result = os.path.expanduser('~') | |
# we use 'isdir' instead of 'exists', because we want to | |
# fail if there's a file with that name | |
if os.path.isdir(result): | |
usable = os.access(result, os.W_OK) | |
if not usable: | |
logger.warning('Directory exists but is not writable: %s', result) | |
else: | |
try: | |
os.makedirs(result) | |
usable = True | |
except OSError: | |
logger.warning('Unable to create %s', result, exc_info=True) | |
usable = False | |
if not usable: | |
result = tempfile.mkdtemp() | |
logger.warning('Default location unusable, using %s', result) | |
return os.path.join(result, suffix) | |
def path_to_cache_dir(path): | |
""" | |
Convert an absolute path to a directory name for use in a cache. | |
The algorithm used is: | |
#. On Windows, any ``':'`` in the drive is replaced with ``'---'``. | |
#. Any occurrence of ``os.sep`` is replaced with ``'--'``. | |
#. ``'.cache'`` is appended. | |
""" | |
d, p = os.path.splitdrive(os.path.abspath(path)) | |
if d: | |
d = d.replace(':', '---') | |
p = p.replace(os.sep, '--') | |
return d + p + '.cache' | |
def ensure_slash(s): | |
if not s.endswith('/'): | |
return s + '/' | |
return s | |
def parse_credentials(netloc): | |
username = password = None | |
if '@' in netloc: | |
prefix, netloc = netloc.split('@', 1) | |
if ':' not in prefix: | |
username = prefix | |
else: | |
username, password = prefix.split(':', 1) | |
return username, password, netloc | |
def get_process_umask(): | |
result = os.umask(0o22) | |
os.umask(result) | |
return result | |
def is_string_sequence(seq): | |
result = True | |
i = None | |
for i, s in enumerate(seq): | |
if not isinstance(s, string_types): | |
result = False | |
break | |
assert i is not None | |
return result | |
PROJECT_NAME_AND_VERSION = re.compile('([a-z0-9_]+([.-][a-z_][a-z0-9_]*)*)-' | |
'([a-z0-9_.+-]+)', re.I) | |
PYTHON_VERSION = re.compile(r'-py(\d\.?\d?)') | |
def split_filename(filename, project_name=None): | |
""" | |
Extract name, version, python version from a filename (no extension) | |
Return name, version, pyver or None | |
""" | |
result = None | |
pyver = None | |
filename = unquote(filename).replace(' ', '-') | |
m = PYTHON_VERSION.search(filename) | |
if m: | |
pyver = m.group(1) | |
filename = filename[:m.start()] | |
if project_name and len(filename) > len(project_name) + 1: | |
m = re.match(re.escape(project_name) + r'\b', filename) | |
if m: | |
n = m.end() | |
result = filename[:n], filename[n + 1:], pyver | |
if result is None: | |
m = PROJECT_NAME_AND_VERSION.match(filename) | |
if m: | |
result = m.group(1), m.group(3), pyver | |
return result | |
# Allow spaces in name because of legacy dists like "Twisted Core" | |
NAME_VERSION_RE = re.compile(r'(?P<name>[\w .-]+)\s*' | |
r'\(\s*(?P<ver>[^\s)]+)\)$') | |
def parse_name_and_version(p): | |
""" | |
A utility method used to get name and version from a string. | |
From e.g. a Provides-Dist value. | |
:param p: A value in a form 'foo (1.0)' | |
:return: The name and version as a tuple. | |
""" | |
m = NAME_VERSION_RE.match(p) | |
if not m: | |
raise DistlibException('Ill-formed name/version string: \'%s\'' % p) | |
d = m.groupdict() | |
return d['name'].strip().lower(), d['ver'] | |
def get_extras(requested, available): | |
result = set() | |
requested = set(requested or []) | |
available = set(available or []) | |
if '*' in requested: | |
requested.remove('*') | |
result |= available | |
for r in requested: | |
if r == '-': | |
result.add(r) | |
elif r.startswith('-'): | |
unwanted = r[1:] | |
if unwanted not in available: | |
logger.warning('undeclared extra: %s' % unwanted) | |
if unwanted in result: | |
result.remove(unwanted) | |
else: | |
if r not in available: | |
logger.warning('undeclared extra: %s' % r) | |
result.add(r) | |
return result | |
# | |
# Extended metadata functionality | |
# | |
def _get_external_data(url): | |
result = {} | |
try: | |
# urlopen might fail if it runs into redirections, | |
# because of Python issue #13696. Fixed in locators | |
# using a custom redirect handler. | |
resp = urlopen(url) | |
headers = resp.info() | |
ct = headers.get('Content-Type') | |
if not ct.startswith('application/json'): | |
logger.debug('Unexpected response for JSON request: %s', ct) | |
else: | |
reader = codecs.getreader('utf-8')(resp) | |
#data = reader.read().decode('utf-8') | |
#result = json.loads(data) | |
result = json.load(reader) | |
except Exception as e: | |
logger.exception('Failed to get external data for %s: %s', url, e) | |
return result | |
_external_data_base_url = 'https://www.red-dove.com/pypi/projects/' | |
def get_project_data(name): | |
url = '%s/%s/project.json' % (name[0].upper(), name) | |
url = urljoin(_external_data_base_url, url) | |
result = _get_external_data(url) | |
return result | |
def get_package_data(name, version): | |
url = '%s/%s/package-%s.json' % (name[0].upper(), name, version) | |
url = urljoin(_external_data_base_url, url) | |
return _get_external_data(url) | |
class Cache(object): | |
""" | |
A class implementing a cache for resources that need to live in the file system | |
e.g. shared libraries. This class was moved from resources to here because it | |
could be used by other modules, e.g. the wheel module. | |
""" | |
def __init__(self, base): | |
""" | |
Initialise an instance. | |
:param base: The base directory where the cache should be located. | |
""" | |
# we use 'isdir' instead of 'exists', because we want to | |
# fail if there's a file with that name | |
if not os.path.isdir(base): # pragma: no cover | |
os.makedirs(base) | |
if (os.stat(base).st_mode & 0o77) != 0: | |
logger.warning('Directory \'%s\' is not private', base) | |
self.base = os.path.abspath(os.path.normpath(base)) | |
def prefix_to_dir(self, prefix): | |
""" | |
Converts a resource prefix to a directory name in the cache. | |
""" | |
return path_to_cache_dir(prefix) | |
def clear(self): | |
""" | |
Clear the cache. | |
""" | |
not_removed = [] | |
for fn in os.listdir(self.base): | |
fn = os.path.join(self.base, fn) | |
try: | |
if os.path.islink(fn) or os.path.isfile(fn): | |
os.remove(fn) | |
elif os.path.isdir(fn): | |
shutil.rmtree(fn) | |
except Exception: | |
not_removed.append(fn) | |
return not_removed | |
class EventMixin(object): | |
""" | |
A very simple publish/subscribe system. | |
""" | |
def __init__(self): | |
self._subscribers = {} | |
def add(self, event, subscriber, append=True): | |
""" | |
Add a subscriber for an event. | |
:param event: The name of an event. | |
:param subscriber: The subscriber to be added (and called when the | |
event is published). | |
:param append: Whether to append or prepend the subscriber to an | |
existing subscriber list for the event. | |
""" | |
subs = self._subscribers | |
if event not in subs: | |
subs[event] = deque([subscriber]) | |
else: | |
sq = subs[event] | |
if append: | |
sq.append(subscriber) | |
else: | |
sq.appendleft(subscriber) | |
def remove(self, event, subscriber): | |
""" | |
Remove a subscriber for an event. | |
:param event: The name of an event. | |
:param subscriber: The subscriber to be removed. | |
""" | |
subs = self._subscribers | |
if event not in subs: | |
raise ValueError('No subscribers: %r' % event) | |
subs[event].remove(subscriber) | |
def get_subscribers(self, event): | |
""" | |
Return an iterator for the subscribers for an event. | |
:param event: The event to return subscribers for. | |
""" | |
return iter(self._subscribers.get(event, ())) | |
def publish(self, event, *args, **kwargs): | |
""" | |
Publish a event and return a list of values returned by its | |
subscribers. | |
:param event: The event to publish. | |
:param args: The positional arguments to pass to the event's | |
subscribers. | |
:param kwargs: The keyword arguments to pass to the event's | |
subscribers. | |
""" | |
result = [] | |
for subscriber in self.get_subscribers(event): | |
try: | |
value = subscriber(event, *args, **kwargs) | |
except Exception: | |
logger.exception('Exception during event publication') | |
value = None | |
result.append(value) | |
logger.debug('publish %s: args = %s, kwargs = %s, result = %s', | |
event, args, kwargs, result) | |
return result | |
# | |
# Simple sequencing | |
# | |
class Sequencer(object): | |
def __init__(self): | |
self._preds = {} | |
self._succs = {} | |
self._nodes = set() # nodes with no preds/succs | |
def add_node(self, node): | |
self._nodes.add(node) | |
def remove_node(self, node, edges=False): | |
if node in self._nodes: | |
self._nodes.remove(node) | |
if edges: | |
for p in set(self._preds.get(node, ())): | |
self.remove(p, node) | |
for s in set(self._succs.get(node, ())): | |
self.remove(node, s) | |
# Remove empties | |
for k, v in list(self._preds.items()): | |
if not v: | |
del self._preds[k] | |
for k, v in list(self._succs.items()): | |
if not v: | |
del self._succs[k] | |
def add(self, pred, succ): | |
assert pred != succ | |
self._preds.setdefault(succ, set()).add(pred) | |
self._succs.setdefault(pred, set()).add(succ) | |
def remove(self, pred, succ): | |
assert pred != succ | |
try: | |
preds = self._preds[succ] | |
succs = self._succs[pred] | |
except KeyError: # pragma: no cover | |
raise ValueError('%r not a successor of anything' % succ) | |
try: | |
preds.remove(pred) | |
succs.remove(succ) | |
except KeyError: # pragma: no cover | |
raise ValueError('%r not a successor of %r' % (succ, pred)) | |
def is_step(self, step): | |
return (step in self._preds or step in self._succs or | |
step in self._nodes) | |
def get_steps(self, final): | |
if not self.is_step(final): | |
raise ValueError('Unknown: %r' % final) | |
result = [] | |
todo = [] | |
seen = set() | |
todo.append(final) | |
while todo: | |
step = todo.pop(0) | |
if step in seen: | |
# if a step was already seen, | |
# move it to the end (so it will appear earlier | |
# when reversed on return) ... but not for the | |
# final step, as that would be confusing for | |
# users | |
if step != final: | |
result.remove(step) | |
result.append(step) | |
else: | |
seen.add(step) | |
result.append(step) | |
preds = self._preds.get(step, ()) | |
todo.extend(preds) | |
return reversed(result) | |
@property | |
def strong_connections(self): | |
#http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm | |
index_counter = [0] | |
stack = [] | |
lowlinks = {} | |
index = {} | |
result = [] | |
graph = self._succs | |
def strongconnect(node): | |
# set the depth index for this node to the smallest unused index | |
index[node] = index_counter[0] | |
lowlinks[node] = index_counter[0] | |
index_counter[0] += 1 | |
stack.append(node) | |
# Consider successors | |
try: | |
successors = graph[node] | |
except Exception: | |
successors = [] | |
for successor in successors: | |
if successor not in lowlinks: | |
# Successor has not yet been visited | |
strongconnect(successor) | |
lowlinks[node] = min(lowlinks[node],lowlinks[successor]) | |
elif successor in stack: | |
# the successor is in the stack and hence in the current | |
# strongly connected component (SCC) | |
lowlinks[node] = min(lowlinks[node],index[successor]) | |
# If `node` is a root node, pop the stack and generate an SCC | |
if lowlinks[node] == index[node]: | |
connected_component = [] | |
while True: | |
successor = stack.pop() | |
connected_component.append(successor) | |
if successor == node: break | |
component = tuple(connected_component) | |
# storing the result | |
result.append(component) | |
for node in graph: | |
if node not in lowlinks: | |
strongconnect(node) | |
return result | |
@property | |
def dot(self): | |
result = ['digraph G {'] | |
for succ in self._preds: | |
preds = self._preds[succ] | |
for pred in preds: | |
result.append(' %s -> %s;' % (pred, succ)) | |
for node in self._nodes: | |
result.append(' %s;' % node) | |
result.append('}') | |
return '\n'.join(result) | |
# | |
# Unarchiving functionality for zip, tar, tgz, tbz, whl | |
# | |
ARCHIVE_EXTENSIONS = ('.tar.gz', '.tar.bz2', '.tar', '.zip', | |
'.tgz', '.tbz', '.whl') | |
def unarchive(archive_filename, dest_dir, format=None, check=True): | |
def check_path(path): | |
if not isinstance(path, text_type): | |
path = path.decode('utf-8') | |
p = os.path.abspath(os.path.join(dest_dir, path)) | |
if not p.startswith(dest_dir) or p[plen] != os.sep: | |
raise ValueError('path outside destination: %r' % p) | |
dest_dir = os.path.abspath(dest_dir) | |
plen = len(dest_dir) | |
archive = None | |
if format is None: | |
if archive_filename.endswith(('.zip', '.whl')): | |
format = 'zip' | |
elif archive_filename.endswith(('.tar.gz', '.tgz')): | |
format = 'tgz' | |
mode = 'r:gz' | |
elif archive_filename.endswith(('.tar.bz2', '.tbz')): | |
format = 'tbz' | |
mode = 'r:bz2' | |
elif archive_filename.endswith('.tar'): | |
format = 'tar' | |
mode = 'r' | |
else: # pragma: no cover | |
raise ValueError('Unknown format for %r' % archive_filename) | |
try: | |
if format == 'zip': | |
archive = ZipFile(archive_filename, 'r') | |
if check: | |
names = archive.namelist() | |
for name in names: | |
check_path(name) | |
else: | |
archive = tarfile.open(archive_filename, mode) | |
if check: | |
names = archive.getnames() | |
for name in names: | |
check_path(name) | |
if format != 'zip' and sys.version_info[0] < 3: | |
# See Python issue 17153. If the dest path contains Unicode, | |
# tarfile extraction fails on Python 2.x if a member path name | |
# contains non-ASCII characters - it leads to an implicit | |
# bytes -> unicode conversion using ASCII to decode. | |
for tarinfo in archive.getmembers(): | |
if not isinstance(tarinfo.name, text_type): | |
tarinfo.name = tarinfo.name.decode('utf-8') | |
archive.extractall(dest_dir) | |
finally: | |
if archive: | |
archive.close() | |
def zip_dir(directory): | |
"""zip a directory tree into a BytesIO object""" | |
result = io.BytesIO() | |
dlen = len(directory) | |
with ZipFile(result, "w") as zf: | |
for root, dirs, files in os.walk(directory): | |
for name in files: | |
full = os.path.join(root, name) | |
rel = root[dlen:] | |
dest = os.path.join(rel, name) | |
zf.write(full, dest) | |
return result | |
# | |
# Simple progress bar | |
# | |
UNITS = ('', 'K', 'M', 'G','T','P') | |
class Progress(object): | |
unknown = 'UNKNOWN' | |
def __init__(self, minval=0, maxval=100): | |
assert maxval is None or maxval >= minval | |
self.min = self.cur = minval | |
self.max = maxval | |
self.started = None | |
self.elapsed = 0 | |
self.done = False | |
def update(self, curval): | |
assert self.min <= curval | |
assert self.max is None or curval <= self.max | |
self.cur = curval | |
now = time.time() | |
if self.started is None: | |
self.started = now | |
else: | |
self.elapsed = now - self.started | |
def increment(self, incr): | |
assert incr >= 0 | |
self.update(self.cur + incr) | |
def start(self): | |
self.update(self.min) | |
return self | |
def stop(self): | |
if self.max is not None: | |
self.update(self.max) | |
self.done = True | |
@property | |
def maximum(self): | |
return self.unknown if self.max is None else self.max | |
@property | |
def percentage(self): | |
if self.done: | |
result = '100 %' | |
elif self.max is None: | |
result = ' ?? %' | |
else: | |
v = 100.0 * (self.cur - self.min) / (self.max - self.min) | |
result = '%3d %%' % v | |
return result | |
def format_duration(self, duration): | |
if (duration <= 0) and self.max is None or self.cur == self.min: | |
result = '??:??:??' | |
#elif duration < 1: | |
# result = '--:--:--' | |
else: | |
result = time.strftime('%H:%M:%S', time.gmtime(duration)) | |
return result | |
@property | |
def ETA(self): | |
if self.done: | |
prefix = 'Done' | |
t = self.elapsed | |
#import pdb; pdb.set_trace() | |
else: | |
prefix = 'ETA ' | |
if self.max is None: | |
t = -1 | |
elif self.elapsed == 0 or (self.cur == self.min): | |
t = 0 | |
else: | |
#import pdb; pdb.set_trace() | |
t = float(self.max - self.min) | |
t /= self.cur - self.min | |
t = (t - 1) * self.elapsed | |
return '%s: %s' % (prefix, self.format_duration(t)) | |
@property | |
def speed(self): | |
if self.elapsed == 0: | |
result = 0.0 | |
else: | |
result = (self.cur - self.min) / self.elapsed | |
for unit in UNITS: | |
if result < 1000: | |
break | |
result /= 1000.0 | |
return '%d %sB/s' % (result, unit) | |
# | |
# Glob functionality | |
# | |
RICH_GLOB = re.compile(r'\{([^}]*)\}') | |
_CHECK_RECURSIVE_GLOB = re.compile(r'[^/\\,{]\*\*|\*\*[^/\\,}]') | |
_CHECK_MISMATCH_SET = re.compile(r'^[^{]*\}|\{[^}]*$') | |
def iglob(path_glob): | |
"""Extended globbing function that supports ** and {opt1,opt2,opt3}.""" | |
if _CHECK_RECURSIVE_GLOB.search(path_glob): | |
msg = """invalid glob %r: recursive glob "**" must be used alone""" | |
raise ValueError(msg % path_glob) | |
if _CHECK_MISMATCH_SET.search(path_glob): | |
msg = """invalid glob %r: mismatching set marker '{' or '}'""" | |
raise ValueError(msg % path_glob) | |
return _iglob(path_glob) | |
def _iglob(path_glob): | |
rich_path_glob = RICH_GLOB.split(path_glob, 1) | |
if len(rich_path_glob) > 1: | |
assert len(rich_path_glob) == 3, rich_path_glob | |
prefix, set, suffix = rich_path_glob | |
for item in set.split(','): | |
for path in _iglob(''.join((prefix, item, suffix))): | |
yield path | |
else: | |
if '**' not in path_glob: | |
for item in std_iglob(path_glob): | |
yield item | |
else: | |
prefix, radical = path_glob.split('**', 1) | |
if prefix == '': | |
prefix = '.' | |
if radical == '': | |
radical = '*' | |
else: | |
# we support both | |
radical = radical.lstrip('/') | |
radical = radical.lstrip('\\') | |
for path, dir, files in os.walk(prefix): | |
path = os.path.normpath(path) | |
for fn in _iglob(os.path.join(path, radical)): | |
yield fn | |
if ssl: | |
from .compat import (HTTPSHandler as BaseHTTPSHandler, match_hostname, | |
CertificateError) | |
# | |
# HTTPSConnection which verifies certificates/matches domains | |
# | |
class HTTPSConnection(httplib.HTTPSConnection): | |
ca_certs = None # set this to the path to the certs file (.pem) | |
check_domain = True # only used if ca_certs is not None | |
# noinspection PyPropertyAccess | |
def connect(self): | |
sock = socket.create_connection((self.host, self.port), self.timeout) | |
if getattr(self, '_tunnel_host', False): | |
self.sock = sock | |
self._tunnel() | |
if not hasattr(ssl, 'SSLContext'): | |
# For 2.x | |
if self.ca_certs: | |
cert_reqs = ssl.CERT_REQUIRED | |
else: | |
cert_reqs = ssl.CERT_NONE | |
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, | |
cert_reqs=cert_reqs, | |
ssl_version=ssl.PROTOCOL_SSLv23, | |
ca_certs=self.ca_certs) | |
else: # pragma: no cover | |
context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) | |
context.options |= ssl.OP_NO_SSLv2 | |
if self.cert_file: | |
context.load_cert_chain(self.cert_file, self.key_file) | |
kwargs = {} | |
if self.ca_certs: | |
context.verify_mode = ssl.CERT_REQUIRED | |
context.load_verify_locations(cafile=self.ca_certs) | |
if getattr(ssl, 'HAS_SNI', False): | |
kwargs['server_hostname'] = self.host | |
self.sock = context.wrap_socket(sock, **kwargs) | |
if self.ca_certs and self.check_domain: | |
try: | |
match_hostname(self.sock.getpeercert(), self.host) | |
logger.debug('Host verified: %s', self.host) | |
except CertificateError: # pragma: no cover | |
self.sock.shutdown(socket.SHUT_RDWR) | |
self.sock.close() | |
raise | |
class HTTPSHandler(BaseHTTPSHandler): | |
def __init__(self, ca_certs, check_domain=True): | |
BaseHTTPSHandler.__init__(self) | |
self.ca_certs = ca_certs | |
self.check_domain = check_domain | |
def _conn_maker(self, *args, **kwargs): | |
""" | |
This is called to create a connection instance. Normally you'd | |
pass a connection class to do_open, but it doesn't actually check for | |
a class, and just expects a callable. As long as we behave just as a | |
constructor would have, we should be OK. If it ever changes so that | |
we *must* pass a class, we'll create an UnsafeHTTPSConnection class | |
which just sets check_domain to False in the class definition, and | |
choose which one to pass to do_open. | |
""" | |
result = HTTPSConnection(*args, **kwargs) | |
if self.ca_certs: | |
result.ca_certs = self.ca_certs | |
result.check_domain = self.check_domain | |
return result | |
def https_open(self, req): | |
try: | |
return self.do_open(self._conn_maker, req) | |
except URLError as e: | |
if 'certificate verify failed' in str(e.reason): | |
raise CertificateError('Unable to verify server certificate ' | |
'for %s' % req.host) | |
else: | |
raise | |
# | |
# To prevent against mixing HTTP traffic with HTTPS (examples: A Man-In-The- | |
# Middle proxy using HTTP listens on port 443, or an index mistakenly serves | |
# HTML containing a http://xyz link when it should be https://xyz), | |
# you can use the following handler class, which does not allow HTTP traffic. | |
# | |
# It works by inheriting from HTTPHandler - so build_opener won't add a | |
# handler for HTTP itself. | |
# | |
class HTTPSOnlyHandler(HTTPSHandler, HTTPHandler): | |
def http_open(self, req): | |
raise URLError('Unexpected HTTP request on what should be a secure ' | |
'connection: %s' % req) | |
# | |
# XML-RPC with timeouts | |
# | |
_ver_info = sys.version_info[:2] | |
if _ver_info == (2, 6): | |
class HTTP(httplib.HTTP): | |
def __init__(self, host='', port=None, **kwargs): | |
if port == 0: # 0 means use port 0, not the default port | |
port = None | |
self._setup(self._connection_class(host, port, **kwargs)) | |
if ssl: | |
class HTTPS(httplib.HTTPS): | |
def __init__(self, host='', port=None, **kwargs): | |
if port == 0: # 0 means use port 0, not the default port | |
port = None | |
self._setup(self._connection_class(host, port, **kwargs)) | |
class Transport(xmlrpclib.Transport): | |
def __init__(self, timeout, use_datetime=0): | |
self.timeout = timeout | |
xmlrpclib.Transport.__init__(self, use_datetime) | |
def make_connection(self, host): | |
h, eh, x509 = self.get_host_info(host) | |
if _ver_info == (2, 6): | |
result = HTTP(h, timeout=self.timeout) | |
else: | |
if not self._connection or host != self._connection[0]: | |
self._extra_headers = eh | |
self._connection = host, httplib.HTTPConnection(h) | |
result = self._connection[1] | |
return result | |
if ssl: | |
class SafeTransport(xmlrpclib.SafeTransport): | |
def __init__(self, timeout, use_datetime=0): | |
self.timeout = timeout | |
xmlrpclib.SafeTransport.__init__(self, use_datetime) | |
def make_connection(self, host): | |
h, eh, kwargs = self.get_host_info(host) | |
if not kwargs: | |
kwargs = {} | |
kwargs['timeout'] = self.timeout | |
if _ver_info == (2, 6): | |
result = HTTPS(host, None, **kwargs) | |
else: | |
if not self._connection or host != self._connection[0]: | |
self._extra_headers = eh | |
self._connection = host, httplib.HTTPSConnection(h, None, | |
**kwargs) | |
result = self._connection[1] | |
return result | |
class ServerProxy(xmlrpclib.ServerProxy): | |
def __init__(self, uri, **kwargs): | |
self.timeout = timeout = kwargs.pop('timeout', None) | |
# The above classes only come into play if a timeout | |
# is specified | |
if timeout is not None: | |
scheme, _ = splittype(uri) | |
use_datetime = kwargs.get('use_datetime', 0) | |
if scheme == 'https': | |
tcls = SafeTransport | |
else: | |
tcls = Transport | |
kwargs['transport'] = t = tcls(timeout, use_datetime=use_datetime) | |
self.transport = t | |
xmlrpclib.ServerProxy.__init__(self, uri, **kwargs) | |
# | |
# CSV functionality. This is provided because on 2.x, the csv module can't | |
# handle Unicode. However, we need to deal with Unicode in e.g. RECORD files. | |
# | |
def _csv_open(fn, mode, **kwargs): | |
if sys.version_info[0] < 3: | |
mode += 'b' | |
else: | |
kwargs['newline'] = '' | |
return open(fn, mode, **kwargs) | |
class CSVBase(object): | |
defaults = { | |
'delimiter': str(','), # The strs are used because we need native | |
'quotechar': str('"'), # str in the csv API (2.x won't take | |
'lineterminator': str('\n') # Unicode) | |
} | |
def __enter__(self): | |
return self | |
def __exit__(self, *exc_info): | |
self.stream.close() | |
class CSVReader(CSVBase): | |
def __init__(self, **kwargs): | |
if 'stream' in kwargs: | |
stream = kwargs['stream'] | |
if sys.version_info[0] >= 3: | |
# needs to be a text stream | |
stream = codecs.getreader('utf-8')(stream) | |
self.stream = stream | |
else: | |
self.stream = _csv_open(kwargs['path'], 'r') | |
self.reader = csv.reader(self.stream, **self.defaults) | |
def __iter__(self): | |
return self | |
def next(self): | |
result = next(self.reader) | |
if sys.version_info[0] < 3: | |
for i, item in enumerate(result): | |
if not isinstance(item, text_type): | |
result[i] = item.decode('utf-8') | |
return result | |
__next__ = next | |
class CSVWriter(CSVBase): | |
def __init__(self, fn, **kwargs): | |
self.stream = _csv_open(fn, 'w') | |
self.writer = csv.writer(self.stream, **self.defaults) | |
def writerow(self, row): | |
if sys.version_info[0] < 3: | |
r = [] | |
for item in row: | |
if isinstance(item, text_type): | |
item = item.encode('utf-8') | |
r.append(item) | |
row = r | |
self.writer.writerow(row) | |
# | |
# Configurator functionality | |
# | |
class Configurator(BaseConfigurator): | |
value_converters = dict(BaseConfigurator.value_converters) | |
value_converters['inc'] = 'inc_convert' | |
def __init__(self, config, base=None): | |
super(Configurator, self).__init__(config) | |
self.base = base or os.getcwd() | |
def configure_custom(self, config): | |
def convert(o): | |
if isinstance(o, (list, tuple)): | |
result = type(o)([convert(i) for i in o]) | |
elif isinstance(o, dict): | |
if '()' in o: | |
result = self.configure_custom(o) | |
else: | |
result = {} | |
for k in o: | |
result[k] = convert(o[k]) | |
else: | |
result = self.convert(o) | |
return result | |
c = config.pop('()') | |
if not callable(c): | |
c = self.resolve(c) | |
props = config.pop('.', None) | |
# Check for valid identifiers | |
args = config.pop('[]', ()) | |
if args: | |
args = tuple([convert(o) for o in args]) | |
items = [(k, convert(config[k])) for k in config if valid_ident(k)] | |
kwargs = dict(items) | |
result = c(*args, **kwargs) | |
if props: | |
for n, v in props.items(): | |
setattr(result, n, convert(v)) | |
return result | |
def __getitem__(self, key): | |
result = self.config[key] | |
if isinstance(result, dict) and '()' in result: | |
self.config[key] = result = self.configure_custom(result) | |
return result | |
def inc_convert(self, value): | |
"""Default converter for the inc:// protocol.""" | |
if not os.path.isabs(value): | |
value = os.path.join(self.base, value) | |
with codecs.open(value, 'r', encoding='utf-8') as f: | |
result = json.load(f) | |
return result | |
# | |
# Mixin for running subprocesses and capturing their output | |
# | |
class SubprocessMixin(object): | |
def __init__(self, verbose=False, progress=None): | |
self.verbose = verbose | |
self.progress = progress | |
def reader(self, stream, context): | |
""" | |
Read lines from a subprocess' output stream and either pass to a progress | |
callable (if specified) or write progress information to sys.stderr. | |
""" | |
progress = self.progress | |
verbose = self.verbose | |
while True: | |
s = stream.readline() | |
if not s: | |
break | |
if progress is not None: | |
progress(s, context) | |
else: | |
if not verbose: | |
sys.stderr.write('.') | |
else: | |
sys.stderr.write(s.decode('utf-8')) | |
sys.stderr.flush() | |
stream.close() | |
def run_command(self, cmd, **kwargs): | |
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE, **kwargs) | |
t1 = threading.Thread(target=self.reader, args=(p.stdout, 'stdout')) | |
t1.start() | |
t2 = threading.Thread(target=self.reader, args=(p.stderr, 'stderr')) | |
t2.start() | |
p.wait() | |
t1.join() | |
t2.join() | |
if self.progress is not None: | |
self.progress('done.', 'main') | |
elif self.verbose: | |
sys.stderr.write('done.\n') | |
return p | |
def normalize_name(name): | |
"""Normalize a python package name a la PEP 503""" | |
# https://www.python.org/dev/peps/pep-0503/#normalized-names | |
return re.sub('[-_.]+', '-', name).lower() |
# -*- coding: utf-8 -*- | |
# | |
# Copyright (C) 2012-2016 The Python Software Foundation. | |
# See LICENSE.txt and CONTRIBUTORS.txt. | |
# | |
""" | |
Implementation of a flexible versioning scheme providing support for PEP-440, | |
setuptools-compatible and semantic versioning. | |
""" | |
import logging | |
import re | |
from .compat import string_types | |
__all__ = ['NormalizedVersion', 'NormalizedMatcher', | |
'LegacyVersion', 'LegacyMatcher', | |
'SemanticVersion', 'SemanticMatcher', | |
'UnsupportedVersionError', 'get_scheme'] | |
logger = logging.getLogger(__name__) | |
class UnsupportedVersionError(ValueError): | |
"""This is an unsupported version.""" | |
pass | |
class Version(object): | |
def __init__(self, s): | |
self._string = s = s.strip() | |
self._parts = parts = self.parse(s) | |
assert isinstance(parts, tuple) | |
assert len(parts) > 0 | |
def parse(self, s): | |
raise NotImplementedError('please implement in a subclass') | |
def _check_compatible(self, other): | |
if type(self) != type(other): | |
raise TypeError('cannot compare %r and %r' % (self, other)) | |
def __eq__(self, other): | |
self._check_compatible(other) | |
return self._parts == other._parts | |
def __ne__(self, other): | |
return not self.__eq__(other) | |
def __lt__(self, other): | |
self._check_compatible(other) | |
return self._parts < other._parts | |
def __gt__(self, other): | |
return not (self.__lt__(other) or self.__eq__(other)) | |
def __le__(self, other): | |
return self.__lt__(other) or self.__eq__(other) | |
def __ge__(self, other): | |
return self.__gt__(other) or self.__eq__(other) | |
# See http://docs.python.org/reference/datamodel#object.__hash__ | |
def __hash__(self): | |
return hash(self._parts) | |
def __repr__(self): | |
return "%s('%s')" % (self.__class__.__name__, self._string) | |
def __str__(self): | |
return self._string | |
@property | |
def is_prerelease(self): | |
raise NotImplementedError('Please implement in subclasses.') | |
class Matcher(object): | |
version_class = None | |
dist_re = re.compile(r"^(\w[\s\w'.-]*)(\((.*)\))?") | |
comp_re = re.compile(r'^(<=|>=|<|>|!=|={2,3}|~=)?\s*([^\s,]+)$') | |
num_re = re.compile(r'^\d+(\.\d+)*$') | |
# value is either a callable or the name of a method | |
_operators = { | |
'<': lambda v, c, p: v < c, | |
'>': lambda v, c, p: v > c, | |
'<=': lambda v, c, p: v == c or v < c, | |
'>=': lambda v, c, p: v == c or v > c, | |
'==': lambda v, c, p: v == c, | |
'===': lambda v, c, p: v == c, | |
# by default, compatible => >=. | |
'~=': lambda v, c, p: v == c or v > c, | |
'!=': lambda v, c, p: v != c, | |
} | |
def __init__(self, s): | |
if self.version_class is None: | |
raise ValueError('Please specify a version class') | |
self._string = s = s.strip() | |
m = self.dist_re.match(s) | |
if not m: | |
raise ValueError('Not valid: %r' % s) | |
groups = m.groups('') | |
self.name = groups[0].strip() | |
self.key = self.name.lower() # for case-insensitive comparisons | |
clist = [] | |
if groups[2]: | |
constraints = [c.strip() for c in groups[2].split(',')] | |
for c in constraints: | |
m = self.comp_re.match(c) | |
if not m: | |
raise ValueError('Invalid %r in %r' % (c, s)) | |
groups = m.groups() | |
op = groups[0] or '~=' | |
s = groups[1] | |
if s.endswith('.*'): | |
if op not in ('==', '!='): | |
raise ValueError('\'.*\' not allowed for ' | |
'%r constraints' % op) | |
# Could be a partial version (e.g. for '2.*') which | |
# won't parse as a version, so keep it as a string | |
vn, prefix = s[:-2], True | |
if not self.num_re.match(vn): | |
# Just to check that vn is a valid version | |
self.version_class(vn) | |
else: | |
# Should parse as a version, so we can create an | |
# instance for the comparison | |
vn, prefix = self.version_class(s), False | |
clist.append((op, vn, prefix)) | |
self._parts = tuple(clist) | |
def match(self, version): | |
""" | |
Check if the provided version matches the constraints. | |
:param version: The version to match against this instance. | |
:type version: String or :class:`Version` instance. | |
""" | |
if isinstance(version, string_types): | |
version = self.version_class(version) | |
for operator, constraint, prefix in self._parts: | |
f = self._operators.get(operator) | |
if isinstance(f, string_types): | |
f = getattr(self, f) | |
if not f: | |
msg = ('%r not implemented ' | |
'for %s' % (operator, self.__class__.__name__)) | |
raise NotImplementedError(msg) | |
if not f(version, constraint, prefix): | |
return False | |
return True | |
@property | |
def exact_version(self): | |
result = None | |
if len(self._parts) == 1 and self._parts[0][0] in ('==', '==='): | |
result = self._parts[0][1] | |
return result | |
def _check_compatible(self, other): | |
if type(self) != type(other) or self.name != other.name: | |
raise TypeError('cannot compare %s and %s' % (self, other)) | |
def __eq__(self, other): | |
self._check_compatible(other) | |
return self.key == other.key and self._parts == other._parts | |
def __ne__(self, other): | |
return not self.__eq__(other) | |
# See http://docs.python.org/reference/datamodel#object.__hash__ | |
def __hash__(self): | |
return hash(self.key) + hash(self._parts) | |
def __repr__(self): | |
return "%s(%r)" % (self.__class__.__name__, self._string) | |
def __str__(self): | |
return self._string | |
PEP440_VERSION_RE = re.compile(r'^v?(\d+!)?(\d+(\.\d+)*)((a|b|c|rc)(\d+))?' | |
r'(\.(post)(\d+))?(\.(dev)(\d+))?' | |
r'(\+([a-zA-Z\d]+(\.[a-zA-Z\d]+)?))?$') | |
def _pep_440_key(s): | |
s = s.strip() | |
m = PEP440_VERSION_RE.match(s) | |
if not m: | |
raise UnsupportedVersionError('Not a valid version: %s' % s) | |
groups = m.groups() | |
nums = tuple(int(v) for v in groups[1].split('.')) | |
while len(nums) > 1 and nums[-1] == 0: | |
nums = nums[:-1] | |
if not groups[0]: | |
epoch = 0 | |
else: | |
epoch = int(groups[0]) | |
pre = groups[4:6] | |
post = groups[7:9] | |
dev = groups[10:12] | |
local = groups[13] | |
if pre == (None, None): | |
pre = () | |
else: | |
pre = pre[0], int(pre[1]) | |
if post == (None, None): | |
post = () | |
else: | |
post = post[0], int(post[1]) | |
if dev == (None, None): | |
dev = () | |
else: | |
dev = dev[0], int(dev[1]) | |
if local is None: | |
local = () | |
else: | |
parts = [] | |
for part in local.split('.'): | |
# to ensure that numeric compares as > lexicographic, avoid | |
# comparing them directly, but encode a tuple which ensures | |
# correct sorting | |
if part.isdigit(): | |
part = (1, int(part)) | |
else: | |
part = (0, part) | |
parts.append(part) | |
local = tuple(parts) | |
if not pre: | |
# either before pre-release, or final release and after | |
if not post and dev: | |
# before pre-release | |
pre = ('a', -1) # to sort before a0 | |
else: | |
pre = ('z',) # to sort after all pre-releases | |
# now look at the state of post and dev. | |
if not post: | |
post = ('_',) # sort before 'a' | |
if not dev: | |
dev = ('final',) | |
#print('%s -> %s' % (s, m.groups())) | |
return epoch, nums, pre, post, dev, local | |
_normalized_key = _pep_440_key | |
class NormalizedVersion(Version): | |
"""A rational version. | |
Good: | |
1.2 # equivalent to "1.2.0" | |
1.2.0 | |
1.2a1 | |
1.2.3a2 | |
1.2.3b1 | |
1.2.3c1 | |
1.2.3.4 | |
TODO: fill this out | |
Bad: | |
1 # minimum two numbers | |
1.2a # release level must have a release serial | |
1.2.3b | |
""" | |
def parse(self, s): | |
result = _normalized_key(s) | |
# _normalized_key loses trailing zeroes in the release | |
# clause, since that's needed to ensure that X.Y == X.Y.0 == X.Y.0.0 | |
# However, PEP 440 prefix matching needs it: for example, | |
# (~= 1.4.5.0) matches differently to (~= 1.4.5.0.0). | |
m = PEP440_VERSION_RE.match(s) # must succeed | |
groups = m.groups() | |
self._release_clause = tuple(int(v) for v in groups[1].split('.')) | |
return result | |
PREREL_TAGS = set(['a', 'b', 'c', 'rc', 'dev']) | |
@property | |
def is_prerelease(self): | |
return any(t[0] in self.PREREL_TAGS for t in self._parts if t) | |
def _match_prefix(x, y): | |
x = str(x) | |
y = str(y) | |
if x == y: | |
return True | |
if not x.startswith(y): | |
return False | |
n = len(y) | |
return x[n] == '.' | |
class NormalizedMatcher(Matcher): | |
version_class = NormalizedVersion | |
# value is either a callable or the name of a method | |
_operators = { | |
'~=': '_match_compatible', | |
'<': '_match_lt', | |
'>': '_match_gt', | |
'<=': '_match_le', | |
'>=': '_match_ge', | |
'==': '_match_eq', | |
'===': '_match_arbitrary', | |
'!=': '_match_ne', | |
} | |
def _adjust_local(self, version, constraint, prefix): | |
if prefix: | |
strip_local = '+' not in constraint and version._parts[-1] | |
else: | |
# both constraint and version are | |
# NormalizedVersion instances. | |
# If constraint does not have a local component, | |
# ensure the version doesn't, either. | |
strip_local = not constraint._parts[-1] and version._parts[-1] | |
if strip_local: | |
s = version._string.split('+', 1)[0] | |
version = self.version_class(s) | |
return version, constraint | |
def _match_lt(self, version, constraint, prefix): | |
version, constraint = self._adjust_local(version, constraint, prefix) | |
if version >= constraint: | |
return False | |
release_clause = constraint._release_clause | |
pfx = '.'.join([str(i) for i in release_clause]) | |
return not _match_prefix(version, pfx) | |
def _match_gt(self, version, constraint, prefix): | |
version, constraint = self._adjust_local(version, constraint, prefix) | |
if version <= constraint: | |
return False | |
release_clause = constraint._release_clause | |
pfx = '.'.join([str(i) for i in release_clause]) | |
return not _match_prefix(version, pfx) | |
def _match_le(self, version, constraint, prefix): | |
version, constraint = self._adjust_local(version, constraint, prefix) | |
return version <= constraint | |
def _match_ge(self, version, constraint, prefix): | |
version, constraint = self._adjust_local(version, constraint, prefix) | |
return version >= constraint | |
def _match_eq(self, version, constraint, prefix): | |
version, constraint = self._adjust_local(version, constraint, prefix) | |
if not prefix: | |
result = (version == constraint) | |
else: | |
result = _match_prefix(version, constraint) | |
return result | |
def _match_arbitrary(self, version, constraint, prefix): | |
return str(version) == str(constraint) | |
def _match_ne(self, version, constraint, prefix): | |
version, constraint = self._adjust_local(version, constraint, prefix) | |
if not prefix: | |
result = (version != constraint) | |
else: | |
result = not _match_prefix(version, constraint) | |
return result | |
def _match_compatible(self, version, constraint, prefix): | |
version, constraint = self._adjust_local(version, constraint, prefix) | |
if version == constraint: | |
return True | |
if version < constraint: | |
return False | |
# if not prefix: | |
# return True | |
release_clause = constraint._release_clause | |
if len(release_clause) > 1: | |
release_clause = release_clause[:-1] | |
pfx = '.'.join([str(i) for i in release_clause]) | |
return _match_prefix(version, pfx) | |
_REPLACEMENTS = ( | |
(re.compile('[.+-]$'), ''), # remove trailing puncts | |
(re.compile(r'^[.](\d)'), r'0.\1'), # .N -> 0.N at start | |
(re.compile('^[.-]'), ''), # remove leading puncts | |
(re.compile(r'^\((.*)\)$'), r'\1'), # remove parentheses | |
(re.compile(r'^v(ersion)?\s*(\d+)'), r'\2'), # remove leading v(ersion) | |
(re.compile(r'^r(ev)?\s*(\d+)'), r'\2'), # remove leading v(ersion) | |
(re.compile('[.]{2,}'), '.'), # multiple runs of '.' | |
(re.compile(r'\b(alfa|apha)\b'), 'alpha'), # misspelt alpha | |
(re.compile(r'\b(pre-alpha|prealpha)\b'), | |
'pre.alpha'), # standardise | |
(re.compile(r'\(beta\)$'), 'beta'), # remove parentheses | |
) | |
_SUFFIX_REPLACEMENTS = ( | |
(re.compile('^[:~._+-]+'), ''), # remove leading puncts | |
(re.compile('[,*")([\]]'), ''), # remove unwanted chars | |
(re.compile('[~:+_ -]'), '.'), # replace illegal chars | |
(re.compile('[.]{2,}'), '.'), # multiple runs of '.' | |
(re.compile(r'\.$'), ''), # trailing '.' | |
) | |
_NUMERIC_PREFIX = re.compile(r'(\d+(\.\d+)*)') | |
def _suggest_semantic_version(s): | |
""" | |
Try to suggest a semantic form for a version for which | |
_suggest_normalized_version couldn't come up with anything. | |
""" | |
result = s.strip().lower() | |
for pat, repl in _REPLACEMENTS: | |
result = pat.sub(repl, result) | |
if not result: | |
result = '0.0.0' | |
# Now look for numeric prefix, and separate it out from | |
# the rest. | |
#import pdb; pdb.set_trace() | |
m = _NUMERIC_PREFIX.match(result) | |
if not m: | |
prefix = '0.0.0' | |
suffix = result | |
else: | |
prefix = m.groups()[0].split('.') | |
prefix = [int(i) for i in prefix] | |
while len(prefix) < 3: | |
prefix.append(0) | |
if len(prefix) == 3: | |
suffix = result[m.end():] | |
else: | |
suffix = '.'.join([str(i) for i in prefix[3:]]) + result[m.end():] | |
prefix = prefix[:3] | |
prefix = '.'.join([str(i) for i in prefix]) | |
suffix = suffix.strip() | |
if suffix: | |
#import pdb; pdb.set_trace() | |
# massage the suffix. | |
for pat, repl in _SUFFIX_REPLACEMENTS: | |
suffix = pat.sub(repl, suffix) | |
if not suffix: | |
result = prefix | |
else: | |
sep = '-' if 'dev' in suffix else '+' | |
result = prefix + sep + suffix | |
if not is_semver(result): | |
result = None | |
return result | |
def _suggest_normalized_version(s): | |
"""Suggest a normalized version close to the given version string. | |
If you have a version string that isn't rational (i.e. NormalizedVersion | |
doesn't like it) then you might be able to get an equivalent (or close) | |
rational version from this function. | |
This does a number of simple normalizations to the given string, based | |
on observation of versions currently in use on PyPI. Given a dump of | |
those version during PyCon 2009, 4287 of them: | |
- 2312 (53.93%) match NormalizedVersion without change | |
with the automatic suggestion | |
- 3474 (81.04%) match when using this suggestion method | |
@param s {str} An irrational version string. | |
@returns A rational version string, or None, if couldn't determine one. | |
""" | |
try: | |
_normalized_key(s) | |
return s # already rational | |
except UnsupportedVersionError: | |
pass | |
rs = s.lower() | |
# part of this could use maketrans | |
for orig, repl in (('-alpha', 'a'), ('-beta', 'b'), ('alpha', 'a'), | |
('beta', 'b'), ('rc', 'c'), ('-final', ''), | |
('-pre', 'c'), | |
('-release', ''), ('.release', ''), ('-stable', ''), | |
('+', '.'), ('_', '.'), (' ', ''), ('.final', ''), | |
('final', '')): | |
rs = rs.replace(orig, repl) | |
# if something ends with dev or pre, we add a 0 | |
rs = re.sub(r"pre$", r"pre0", rs) | |
rs = re.sub(r"dev$", r"dev0", rs) | |
# if we have something like "b-2" or "a.2" at the end of the | |
# version, that is probably beta, alpha, etc | |
# let's remove the dash or dot | |
rs = re.sub(r"([abc]|rc)[\-\.](\d+)$", r"\1\2", rs) | |
# 1.0-dev-r371 -> 1.0.dev371 | |
# 0.1-dev-r79 -> 0.1.dev79 | |
rs = re.sub(r"[\-\.](dev)[\-\.]?r?(\d+)$", r".\1\2", rs) | |
# Clean: 2.0.a.3, 2.0.b1, 0.9.0~c1 | |
rs = re.sub(r"[.~]?([abc])\.?", r"\1", rs) | |
# Clean: v0.3, v1.0 | |
if rs.startswith('v'): | |
rs = rs[1:] | |
# Clean leading '0's on numbers. | |
#TODO: unintended side-effect on, e.g., "2003.05.09" | |
# PyPI stats: 77 (~2%) better | |
rs = re.sub(r"\b0+(\d+)(?!\d)", r"\1", rs) | |
# Clean a/b/c with no version. E.g. "1.0a" -> "1.0a0". Setuptools infers | |
# zero. | |
# PyPI stats: 245 (7.56%) better | |
rs = re.sub(r"(\d+[abc])$", r"\g<1>0", rs) | |
# the 'dev-rNNN' tag is a dev tag | |
rs = re.sub(r"\.?(dev-r|dev\.r)\.?(\d+)$", r".dev\2", rs) | |
# clean the - when used as a pre delimiter | |
rs = re.sub(r"-(a|b|c)(\d+)$", r"\1\2", rs) | |
# a terminal "dev" or "devel" can be changed into ".dev0" | |
rs = re.sub(r"[\.\-](dev|devel)$", r".dev0", rs) | |
# a terminal "dev" can be changed into ".dev0" | |
rs = re.sub(r"(?![\.\-])dev$", r".dev0", rs) | |
# a terminal "final" or "stable" can be removed | |
rs = re.sub(r"(final|stable)$", "", rs) | |
# The 'r' and the '-' tags are post release tags | |
# 0.4a1.r10 -> 0.4a1.post10 | |
# 0.9.33-17222 -> 0.9.33.post17222 | |
# 0.9.33-r17222 -> 0.9.33.post17222 | |
rs = re.sub(r"\.?(r|-|-r)\.?(\d+)$", r".post\2", rs) | |
# Clean 'r' instead of 'dev' usage: | |
# 0.9.33+r17222 -> 0.9.33.dev17222 | |
# 1.0dev123 -> 1.0.dev123 | |
# 1.0.git123 -> 1.0.dev123 | |
# 1.0.bzr123 -> 1.0.dev123 | |
# 0.1a0dev.123 -> 0.1a0.dev123 | |
# PyPI stats: ~150 (~4%) better | |
rs = re.sub(r"\.?(dev|git|bzr)\.?(\d+)$", r".dev\2", rs) | |
# Clean '.pre' (normalized from '-pre' above) instead of 'c' usage: | |
# 0.2.pre1 -> 0.2c1 | |
# 0.2-c1 -> 0.2c1 | |
# 1.0preview123 -> 1.0c123 | |
# PyPI stats: ~21 (0.62%) better | |
rs = re.sub(r"\.?(pre|preview|-c)(\d+)$", r"c\g<2>", rs) | |
# Tcl/Tk uses "px" for their post release markers | |
rs = re.sub(r"p(\d+)$", r".post\1", rs) | |
try: | |
_normalized_key(rs) | |
except UnsupportedVersionError: | |
rs = None | |
return rs | |
# | |
# Legacy version processing (distribute-compatible) | |
# | |
_VERSION_PART = re.compile(r'([a-z]+|\d+|[\.-])', re.I) | |
_VERSION_REPLACE = { | |
'pre': 'c', | |
'preview': 'c', | |
'-': 'final-', | |
'rc': 'c', | |
'dev': '@', | |
'': None, | |
'.': None, | |
} | |
def _legacy_key(s): | |
def get_parts(s): | |
result = [] | |
for p in _VERSION_PART.split(s.lower()): | |
p = _VERSION_REPLACE.get(p, p) | |
if p: | |
if '0' <= p[:1] <= '9': | |
p = p.zfill(8) | |
else: | |
p = '*' + p | |
result.append(p) | |
result.append('*final') | |
return result | |
result = [] | |
for p in get_parts(s): | |
if p.startswith('*'): | |
if p < '*final': | |
while result and result[-1] == '*final-': | |
result.pop() | |
while result and result[-1] == '00000000': | |
result.pop() | |
result.append(p) | |
return tuple(result) | |
class LegacyVersion(Version): | |
def parse(self, s): | |
return _legacy_key(s) | |
@property | |
def is_prerelease(self): | |
result = False | |
for x in self._parts: | |
if (isinstance(x, string_types) and x.startswith('*') and | |
x < '*final'): | |
result = True | |
break | |
return result | |
class LegacyMatcher(Matcher): | |
version_class = LegacyVersion | |
_operators = dict(Matcher._operators) | |
_operators['~='] = '_match_compatible' | |
numeric_re = re.compile('^(\d+(\.\d+)*)') | |
def _match_compatible(self, version, constraint, prefix): | |
if version < constraint: | |
return False | |
m = self.numeric_re.match(str(constraint)) | |
if not m: | |
logger.warning('Cannot compute compatible match for version %s ' | |
' and constraint %s', version, constraint) | |
return True | |
s = m.groups()[0] | |
if '.' in s: | |
s = s.rsplit('.', 1)[0] | |
return _match_prefix(version, s) | |
# | |
# Semantic versioning | |
# | |
_SEMVER_RE = re.compile(r'^(\d+)\.(\d+)\.(\d+)' | |
r'(-[a-z0-9]+(\.[a-z0-9-]+)*)?' | |
r'(\+[a-z0-9]+(\.[a-z0-9-]+)*)?$', re.I) | |
def is_semver(s): | |
return _SEMVER_RE.match(s) | |
def _semantic_key(s): | |
def make_tuple(s, absent): | |
if s is None: | |
result = (absent,) | |
else: | |
parts = s[1:].split('.') | |
# We can't compare ints and strings on Python 3, so fudge it | |
# by zero-filling numeric values so simulate a numeric comparison | |
result = tuple([p.zfill(8) if p.isdigit() else p for p in parts]) | |
return result | |
m = is_semver(s) | |
if not m: | |
raise UnsupportedVersionError(s) | |
groups = m.groups() | |
major, minor, patch = [int(i) for i in groups[:3]] | |
# choose the '|' and '*' so that versions sort correctly | |
pre, build = make_tuple(groups[3], '|'), make_tuple(groups[5], '*') | |
return (major, minor, patch), pre, build | |
class SemanticVersion(Version): | |
def parse(self, s): | |
return _semantic_key(s) | |
@property | |
def is_prerelease(self): | |
return self._parts[1][0] != '|' | |
class SemanticMatcher(Matcher): | |
version_class = SemanticVersion | |
class VersionScheme(object): | |
def __init__(self, key, matcher, suggester=None): | |
self.key = key | |
self.matcher = matcher | |
self.suggester = suggester | |
def is_valid_version(self, s): | |
try: | |
self.matcher.version_class(s) | |
result = True | |
except UnsupportedVersionError: | |
result = False | |
return result | |
def is_valid_matcher(self, s): | |
try: | |
self.matcher(s) | |
result = True | |
except UnsupportedVersionError: | |
result = False | |
return result | |
def is_valid_constraint_list(self, s): | |
""" | |
Used for processing some metadata fields | |
""" | |
return self.is_valid_matcher('dummy_name (%s)' % s) | |
def suggest(self, s): | |
if self.suggester is None: | |
result = None | |
else: | |
result = self.suggester(s) | |
return result | |
_SCHEMES = { | |
'normalized': VersionScheme(_normalized_key, NormalizedMatcher, | |
_suggest_normalized_version), | |
'legacy': VersionScheme(_legacy_key, LegacyMatcher, lambda self, s: s), | |
'semantic': VersionScheme(_semantic_key, SemanticMatcher, | |
_suggest_semantic_version), | |
} | |
_SCHEMES['default'] = _SCHEMES['normalized'] | |
def get_scheme(name): | |
if name not in _SCHEMES: | |
raise ValueError('unknown scheme name: %r' % name) | |
return _SCHEMES[name] |
# -*- coding: utf-8 -*- | |
# | |
# Copyright (C) 2013-2016 Vinay Sajip. | |
# Licensed to the Python Software Foundation under a contributor agreement. | |
# See LICENSE.txt and CONTRIBUTORS.txt. | |
# | |
from __future__ import unicode_literals | |
import base64 | |
import codecs | |
import datetime | |
import distutils.util | |
from email import message_from_file | |
import hashlib | |
import imp | |
import json | |
import logging | |
import os | |
import posixpath | |
import re | |
import shutil | |
import sys | |
import tempfile | |
import zipfile | |
from . import __version__, DistlibException | |
from .compat import sysconfig, ZipFile, fsdecode, text_type, filter | |
from .database import InstalledDistribution | |
from .metadata import Metadata, METADATA_FILENAME | |
from .util import (FileOperator, convert_path, CSVReader, CSVWriter, Cache, | |
cached_property, get_cache_base, read_exports, tempdir) | |
from .version import NormalizedVersion, UnsupportedVersionError | |
logger = logging.getLogger(__name__) | |
cache = None # created when needed | |
if hasattr(sys, 'pypy_version_info'): | |
IMP_PREFIX = 'pp' | |
elif sys.platform.startswith('java'): | |
IMP_PREFIX = 'jy' | |
elif sys.platform == 'cli': | |
IMP_PREFIX = 'ip' | |
else: | |
IMP_PREFIX = 'cp' | |
VER_SUFFIX = sysconfig.get_config_var('py_version_nodot') | |
if not VER_SUFFIX: # pragma: no cover | |
VER_SUFFIX = '%s%s' % sys.version_info[:2] | |
PYVER = 'py' + VER_SUFFIX | |
IMPVER = IMP_PREFIX + VER_SUFFIX | |
ARCH = distutils.util.get_platform().replace('-', '_').replace('.', '_') | |
ABI = sysconfig.get_config_var('SOABI') | |
if ABI and ABI.startswith('cpython-'): | |
ABI = ABI.replace('cpython-', 'cp') | |
else: | |
def _derive_abi(): | |
parts = ['cp', VER_SUFFIX] | |
if sysconfig.get_config_var('Py_DEBUG'): | |
parts.append('d') | |
if sysconfig.get_config_var('WITH_PYMALLOC'): | |
parts.append('m') | |
if sysconfig.get_config_var('Py_UNICODE_SIZE') == 4: | |
parts.append('u') | |
return ''.join(parts) | |
ABI = _derive_abi() | |
del _derive_abi | |
FILENAME_RE = re.compile(r''' | |
(?P<nm>[^-]+) | |
-(?P<vn>\d+[^-]*) | |
(-(?P<bn>\d+[^-]*))? | |
-(?P<py>\w+\d+(\.\w+\d+)*) | |
-(?P<bi>\w+) | |
-(?P<ar>\w+(\.\w+)*) | |
\.whl$ | |
''', re.IGNORECASE | re.VERBOSE) | |
NAME_VERSION_RE = re.compile(r''' | |
(?P<nm>[^-]+) | |
-(?P<vn>\d+[^-]*) | |
(-(?P<bn>\d+[^-]*))?$ | |
''', re.IGNORECASE | re.VERBOSE) | |
SHEBANG_RE = re.compile(br'\s*#![^\r\n]*') | |
SHEBANG_DETAIL_RE = re.compile(br'^(\s*#!("[^"]+"|\S+))\s+(.*)$') | |
SHEBANG_PYTHON = b'#!python' | |
SHEBANG_PYTHONW = b'#!pythonw' | |
if os.sep == '/': | |
to_posix = lambda o: o | |
else: | |
to_posix = lambda o: o.replace(os.sep, '/') | |
class Mounter(object): | |
def __init__(self): | |
self.impure_wheels = {} | |
self.libs = {} | |
def add(self, pathname, extensions): | |
self.impure_wheels[pathname] = extensions | |
self.libs.update(extensions) | |
def remove(self, pathname): | |
extensions = self.impure_wheels.pop(pathname) | |
for k, v in extensions: | |
if k in self.libs: | |
del self.libs[k] | |
def find_module(self, fullname, path=None): | |
if fullname in self.libs: | |
result = self | |
else: | |
result = None | |
return result | |
def load_module(self, fullname): | |
if fullname in sys.modules: | |
result = sys.modules[fullname] | |
else: | |
if fullname not in self.libs: | |
raise ImportError('unable to find extension for %s' % fullname) | |
result = imp.load_dynamic(fullname, self.libs[fullname]) | |
result.__loader__ = self | |
parts = fullname.rsplit('.', 1) | |
if len(parts) > 1: | |
result.__package__ = parts[0] | |
return result | |
_hook = Mounter() | |
class Wheel(object): | |
""" | |
Class to build and install from Wheel files (PEP 427). | |
""" | |
wheel_version = (1, 1) | |
hash_kind = 'sha256' | |
def __init__(self, filename=None, sign=False, verify=False): | |
""" | |
Initialise an instance using a (valid) filename. | |
""" | |
self.sign = sign | |
self.should_verify = verify | |
self.buildver = '' | |
self.pyver = [PYVER] | |
self.abi = ['none'] | |
self.arch = ['any'] | |
self.dirname = os.getcwd() | |
if filename is None: | |
self.name = 'dummy' | |
self.version = '0.1' | |
self._filename = self.filename | |
else: | |
m = NAME_VERSION_RE.match(filename) | |
if m: | |
info = m.groupdict('') | |
self.name = info['nm'] | |
# Reinstate the local version separator | |
self.version = info['vn'].replace('_', '-') | |
self.buildver = info['bn'] | |
self._filename = self.filename | |
else: | |
dirname, filename = os.path.split(filename) | |
m = FILENAME_RE.match(filename) | |
if not m: | |
raise DistlibException('Invalid name or ' | |
'filename: %r' % filename) | |
if dirname: | |
self.dirname = os.path.abspath(dirname) | |
self._filename = filename | |
info = m.groupdict('') | |
self.name = info['nm'] | |
self.version = info['vn'] | |
self.buildver = info['bn'] | |
self.pyver = info['py'].split('.') | |
self.abi = info['bi'].split('.') | |
self.arch = info['ar'].split('.') | |
@property | |
def filename(self): | |
""" | |
Build and return a filename from the various components. | |
""" | |
if self.buildver: | |
buildver = '-' + self.buildver | |
else: | |
buildver = '' | |
pyver = '.'.join(self.pyver) | |
abi = '.'.join(self.abi) | |
arch = '.'.join(self.arch) | |
# replace - with _ as a local version separator | |
version = self.version.replace('-', '_') | |
return '%s-%s%s-%s-%s-%s.whl' % (self.name, version, buildver, | |
pyver, abi, arch) | |
@property | |
def exists(self): | |
path = os.path.join(self.dirname, self.filename) | |
return os.path.isfile(path) | |
@property | |
def tags(self): | |
for pyver in self.pyver: | |
for abi in self.abi: | |
for arch in self.arch: | |
yield pyver, abi, arch | |
@cached_property | |
def metadata(self): | |
pathname = os.path.join(self.dirname, self.filename) | |
name_ver = '%s-%s' % (self.name, self.version) | |
info_dir = '%s.dist-info' % name_ver | |
wrapper = codecs.getreader('utf-8') | |
with ZipFile(pathname, 'r') as zf: | |
wheel_metadata = self.get_wheel_metadata(zf) | |
wv = wheel_metadata['Wheel-Version'].split('.', 1) | |
file_version = tuple([int(i) for i in wv]) | |
if file_version < (1, 1): | |
fn = 'METADATA' | |
else: | |
fn = METADATA_FILENAME | |
try: | |
metadata_filename = posixpath.join(info_dir, fn) | |
with zf.open(metadata_filename) as bf: | |
wf = wrapper(bf) | |
result = Metadata(fileobj=wf) | |
except KeyError: | |
raise ValueError('Invalid wheel, because %s is ' | |
'missing' % fn) | |
return result | |
def get_wheel_metadata(self, zf): | |
name_ver = '%s-%s' % (self.name, self.version) | |
info_dir = '%s.dist-info' % name_ver | |
metadata_filename = posixpath.join(info_dir, 'WHEEL') | |
with zf.open(metadata_filename) as bf: | |
wf = codecs.getreader('utf-8')(bf) | |
message = message_from_file(wf) | |
return dict(message) | |
@cached_property | |
def info(self): | |
pathname = os.path.join(self.dirname, self.filename) | |
with ZipFile(pathname, 'r') as zf: | |
result = self.get_wheel_metadata(zf) | |
return result | |
def process_shebang(self, data): | |
m = SHEBANG_RE.match(data) | |
if m: | |
end = m.end() | |
shebang, data_after_shebang = data[:end], data[end:] | |
# Preserve any arguments after the interpreter | |
if b'pythonw' in shebang.lower(): | |
shebang_python = SHEBANG_PYTHONW | |
else: | |
shebang_python = SHEBANG_PYTHON | |
m = SHEBANG_DETAIL_RE.match(shebang) | |
if m: | |
args = b' ' + m.groups()[-1] | |
else: | |
args = b'' | |
shebang = shebang_python + args | |
data = shebang + data_after_shebang | |
else: | |
cr = data.find(b'\r') | |
lf = data.find(b'\n') | |
if cr < 0 or cr > lf: | |
term = b'\n' | |
else: | |
if data[cr:cr + 2] == b'\r\n': | |
term = b'\r\n' | |
else: | |
term = b'\r' | |
data = SHEBANG_PYTHON + term + data | |
return data | |
def get_hash(self, data, hash_kind=None): | |
if hash_kind is None: | |
hash_kind = self.hash_kind | |
try: | |
hasher = getattr(hashlib, hash_kind) | |
except AttributeError: | |
raise DistlibException('Unsupported hash algorithm: %r' % hash_kind) | |
result = hasher(data).digest() | |
result = base64.urlsafe_b64encode(result).rstrip(b'=').decode('ascii') | |
return hash_kind, result | |
def write_record(self, records, record_path, base): | |
records = list(records) # make a copy for sorting | |
p = to_posix(os.path.relpath(record_path, base)) | |
records.append((p, '', '')) | |
records.sort() | |
with CSVWriter(record_path) as writer: | |
for row in records: | |
writer.writerow(row) | |
def write_records(self, info, libdir, archive_paths): | |
records = [] | |
distinfo, info_dir = info | |
hasher = getattr(hashlib, self.hash_kind) | |
for ap, p in archive_paths: | |
with open(p, 'rb') as f: | |
data = f.read() | |
digest = '%s=%s' % self.get_hash(data) | |
size = os.path.getsize(p) | |
records.append((ap, digest, size)) | |
p = os.path.join(distinfo, 'RECORD') | |
self.write_record(records, p, libdir) | |
ap = to_posix(os.path.join(info_dir, 'RECORD')) | |
archive_paths.append((ap, p)) | |
def build_zip(self, pathname, archive_paths): | |
with ZipFile(pathname, 'w', zipfile.ZIP_DEFLATED) as zf: | |
for ap, p in archive_paths: | |
logger.debug('Wrote %s to %s in wheel', p, ap) | |
zf.write(p, ap) | |
def build(self, paths, tags=None, wheel_version=None): | |
""" | |
Build a wheel from files in specified paths, and use any specified tags | |
when determining the name of the wheel. | |
""" | |
if tags is None: | |
tags = {} | |
libkey = list(filter(lambda o: o in paths, ('purelib', 'platlib')))[0] | |
if libkey == 'platlib': | |
is_pure = 'false' | |
default_pyver = [IMPVER] | |
default_abi = [ABI] | |
default_arch = [ARCH] | |
else: | |
is_pure = 'true' | |
default_pyver = [PYVER] | |
default_abi = ['none'] | |
default_arch = ['any'] | |
self.pyver = tags.get('pyver', default_pyver) | |
self.abi = tags.get('abi', default_abi) | |
self.arch = tags.get('arch', default_arch) | |
libdir = paths[libkey] | |
name_ver = '%s-%s' % (self.name, self.version) | |
data_dir = '%s.data' % name_ver | |
info_dir = '%s.dist-info' % name_ver | |
archive_paths = [] | |
# First, stuff which is not in site-packages | |
for key in ('data', 'headers', 'scripts'): | |
if key not in paths: | |
continue | |
path = paths[key] | |
if os.path.isdir(path): | |
for root, dirs, files in os.walk(path): | |
for fn in files: | |
p = fsdecode(os.path.join(root, fn)) | |
rp = os.path.relpath(p, path) | |
ap = to_posix(os.path.join(data_dir, key, rp)) | |
archive_paths.append((ap, p)) | |
if key == 'scripts' and not p.endswith('.exe'): | |
with open(p, 'rb') as f: | |
data = f.read() | |
data = self.process_shebang(data) | |
with open(p, 'wb') as f: | |
f.write(data) | |
# Now, stuff which is in site-packages, other than the | |
# distinfo stuff. | |
path = libdir | |
distinfo = None | |
for root, dirs, files in os.walk(path): | |
if root == path: | |
# At the top level only, save distinfo for later | |
# and skip it for now | |
for i, dn in enumerate(dirs): | |
dn = fsdecode(dn) | |
if dn.endswith('.dist-info'): | |
distinfo = os.path.join(root, dn) | |
del dirs[i] | |
break | |
assert distinfo, '.dist-info directory expected, not found' | |
for fn in files: | |
# comment out next suite to leave .pyc files in | |
if fsdecode(fn).endswith(('.pyc', '.pyo')): | |
continue | |
p = os.path.join(root, fn) | |
rp = to_posix(os.path.relpath(p, path)) | |
archive_paths.append((rp, p)) | |
# Now distinfo. Assumed to be flat, i.e. os.listdir is enough. | |
files = os.listdir(distinfo) | |
for fn in files: | |
if fn not in ('RECORD', 'INSTALLER', 'SHARED', 'WHEEL'): | |
p = fsdecode(os.path.join(distinfo, fn)) | |
ap = to_posix(os.path.join(info_dir, fn)) | |
archive_paths.append((ap, p)) | |
wheel_metadata = [ | |
'Wheel-Version: %d.%d' % (wheel_version or self.wheel_version), | |
'Generator: distlib %s' % __version__, | |
'Root-Is-Purelib: %s' % is_pure, | |
] | |
for pyver, abi, arch in self.tags: | |
wheel_metadata.append('Tag: %s-%s-%s' % (pyver, abi, arch)) | |
p = os.path.join(distinfo, 'WHEEL') | |
with open(p, 'w') as f: | |
f.write('\n'.join(wheel_metadata)) | |
ap = to_posix(os.path.join(info_dir, 'WHEEL')) | |
archive_paths.append((ap, p)) | |
# Now, at last, RECORD. | |
# Paths in here are archive paths - nothing else makes sense. | |
self.write_records((distinfo, info_dir), libdir, archive_paths) | |
# Now, ready to build the zip file | |
pathname = os.path.join(self.dirname, self.filename) | |
self.build_zip(pathname, archive_paths) | |
return pathname | |
def install(self, paths, maker, **kwargs): | |
""" | |
Install a wheel to the specified paths. If kwarg ``warner`` is | |
specified, it should be a callable, which will be called with two | |
tuples indicating the wheel version of this software and the wheel | |
version in the file, if there is a discrepancy in the versions. | |
This can be used to issue any warnings to raise any exceptions. | |
If kwarg ``lib_only`` is True, only the purelib/platlib files are | |
installed, and the headers, scripts, data and dist-info metadata are | |
not written. | |
The return value is a :class:`InstalledDistribution` instance unless | |
``options.lib_only`` is True, in which case the return value is ``None``. | |
""" | |
dry_run = maker.dry_run | |
warner = kwargs.get('warner') | |
lib_only = kwargs.get('lib_only', False) | |
pathname = os.path.join(self.dirname, self.filename) | |
name_ver = '%s-%s' % (self.name, self.version) | |
data_dir = '%s.data' % name_ver | |
info_dir = '%s.dist-info' % name_ver | |
metadata_name = posixpath.join(info_dir, METADATA_FILENAME) | |
wheel_metadata_name = posixpath.join(info_dir, 'WHEEL') | |
record_name = posixpath.join(info_dir, 'RECORD') | |
wrapper = codecs.getreader('utf-8') | |
with ZipFile(pathname, 'r') as zf: | |
with zf.open(wheel_metadata_name) as bwf: | |
wf = wrapper(bwf) | |
message = message_from_file(wf) | |
wv = message['Wheel-Version'].split('.', 1) | |
file_version = tuple([int(i) for i in wv]) | |
if (file_version != self.wheel_version) and warner: | |
warner(self.wheel_version, file_version) | |
if message['Root-Is-Purelib'] == 'true': | |
libdir = paths['purelib'] | |
else: | |
libdir = paths['platlib'] | |
records = {} | |
with zf.open(record_name) as bf: | |
with CSVReader(stream=bf) as reader: | |
for row in reader: | |
p = row[0] | |
records[p] = row | |
data_pfx = posixpath.join(data_dir, '') | |
info_pfx = posixpath.join(info_dir, '') | |
script_pfx = posixpath.join(data_dir, 'scripts', '') | |
# make a new instance rather than a copy of maker's, | |
# as we mutate it | |
fileop = FileOperator(dry_run=dry_run) | |
fileop.record = True # so we can rollback if needed | |
bc = not sys.dont_write_bytecode # Double negatives. Lovely! | |
outfiles = [] # for RECORD writing | |
# for script copying/shebang processing | |
workdir = tempfile.mkdtemp() | |
# set target dir later | |
# we default add_launchers to False, as the | |
# Python Launcher should be used instead | |
maker.source_dir = workdir | |
maker.target_dir = None | |
try: | |
for zinfo in zf.infolist(): | |
arcname = zinfo.filename | |
if isinstance(arcname, text_type): | |
u_arcname = arcname | |
else: | |
u_arcname = arcname.decode('utf-8') | |
# The signature file won't be in RECORD, | |
# and we don't currently don't do anything with it | |
if u_arcname.endswith('/RECORD.jws'): | |
continue | |
row = records[u_arcname] | |
if row[2] and str(zinfo.file_size) != row[2]: | |
raise DistlibException('size mismatch for ' | |
'%s' % u_arcname) | |
if row[1]: | |
kind, value = row[1].split('=', 1) | |
with zf.open(arcname) as bf: | |
data = bf.read() | |
_, digest = self.get_hash(data, kind) | |
if digest != value: | |
raise DistlibException('digest mismatch for ' | |
'%s' % arcname) | |
if lib_only and u_arcname.startswith((info_pfx, data_pfx)): | |
logger.debug('lib_only: skipping %s', u_arcname) | |
continue | |
is_script = (u_arcname.startswith(script_pfx) | |
and not u_arcname.endswith('.exe')) | |
if u_arcname.startswith(data_pfx): | |
_, where, rp = u_arcname.split('/', 2) | |
outfile = os.path.join(paths[where], convert_path(rp)) | |
else: | |
# meant for site-packages. | |
if u_arcname in (wheel_metadata_name, record_name): | |
continue | |
outfile = os.path.join(libdir, convert_path(u_arcname)) | |
if not is_script: | |
with zf.open(arcname) as bf: | |
fileop.copy_stream(bf, outfile) | |
outfiles.append(outfile) | |
# Double check the digest of the written file | |
if not dry_run and row[1]: | |
with open(outfile, 'rb') as bf: | |
data = bf.read() | |
_, newdigest = self.get_hash(data, kind) | |
if newdigest != digest: | |
raise DistlibException('digest mismatch ' | |
'on write for ' | |
'%s' % outfile) | |
if bc and outfile.endswith('.py'): | |
try: | |
pyc = fileop.byte_compile(outfile) | |
outfiles.append(pyc) | |
except Exception: | |
# Don't give up if byte-compilation fails, | |
# but log it and perhaps warn the user | |
logger.warning('Byte-compilation failed', | |
exc_info=True) | |
else: | |
fn = os.path.basename(convert_path(arcname)) | |
workname = os.path.join(workdir, fn) | |
with zf.open(arcname) as bf: | |
fileop.copy_stream(bf, workname) | |
dn, fn = os.path.split(outfile) | |
maker.target_dir = dn | |
filenames = maker.make(fn) | |
fileop.set_executable_mode(filenames) | |
outfiles.extend(filenames) | |
if lib_only: | |
logger.debug('lib_only: returning None') | |
dist = None | |
else: | |
# Generate scripts | |
# Try to get pydist.json so we can see if there are | |
# any commands to generate. If this fails (e.g. because | |
# of a legacy wheel), log a warning but don't give up. | |
commands = None | |
file_version = self.info['Wheel-Version'] | |
if file_version == '1.0': | |
# Use legacy info | |
ep = posixpath.join(info_dir, 'entry_points.txt') | |
try: | |
with zf.open(ep) as bwf: | |
epdata = read_exports(bwf) | |
commands = {} | |
for key in ('console', 'gui'): | |
k = '%s_scripts' % key | |
if k in epdata: | |
commands['wrap_%s' % key] = d = {} | |
for v in epdata[k].values(): | |
s = '%s:%s' % (v.prefix, v.suffix) | |
if v.flags: | |
s += ' %s' % v.flags | |
d[v.name] = s | |
except Exception: | |
logger.warning('Unable to read legacy script ' | |
'metadata, so cannot generate ' | |
'scripts') | |
else: | |
try: | |
with zf.open(metadata_name) as bwf: | |
wf = wrapper(bwf) | |
commands = json.load(wf).get('extensions') | |
if commands: | |
commands = commands.get('python.commands') | |
except Exception: | |
logger.warning('Unable to read JSON metadata, so ' | |
'cannot generate scripts') | |
if commands: | |
console_scripts = commands.get('wrap_console', {}) | |
gui_scripts = commands.get('wrap_gui', {}) | |
if console_scripts or gui_scripts: | |
script_dir = paths.get('scripts', '') | |
if not os.path.isdir(script_dir): | |
raise ValueError('Valid script path not ' | |
'specified') | |
maker.target_dir = script_dir | |
for k, v in console_scripts.items(): | |
script = '%s = %s' % (k, v) | |
filenames = maker.make(script) | |
fileop.set_executable_mode(filenames) | |
if gui_scripts: | |
options = {'gui': True } | |
for k, v in gui_scripts.items(): | |
script = '%s = %s' % (k, v) | |
filenames = maker.make(script, options) | |
fileop.set_executable_mode(filenames) | |
p = os.path.join(libdir, info_dir) | |
dist = InstalledDistribution(p) | |
# Write SHARED | |
paths = dict(paths) # don't change passed in dict | |
del paths['purelib'] | |
del paths['platlib'] | |
paths['lib'] = libdir | |
p = dist.write_shared_locations(paths, dry_run) | |
if p: | |
outfiles.append(p) | |
# Write RECORD | |
dist.write_installed_files(outfiles, paths['prefix'], | |
dry_run) | |
return dist | |
except Exception: # pragma: no cover | |
logger.exception('installation failed.') | |
fileop.rollback() | |
raise | |
finally: | |
shutil.rmtree(workdir) | |
def _get_dylib_cache(self): | |
global cache | |
if cache is None: | |
# Use native string to avoid issues on 2.x: see Python #20140. | |
base = os.path.join(get_cache_base(), str('dylib-cache'), | |
sys.version[:3]) | |
cache = Cache(base) | |
return cache | |
def _get_extensions(self): | |
pathname = os.path.join(self.dirname, self.filename) | |
name_ver = '%s-%s' % (self.name, self.version) | |
info_dir = '%s.dist-info' % name_ver | |
arcname = posixpath.join(info_dir, 'EXTENSIONS') | |
wrapper = codecs.getreader('utf-8') | |
result = [] | |
with ZipFile(pathname, 'r') as zf: | |
try: | |
with zf.open(arcname) as bf: | |
wf = wrapper(bf) | |
extensions = json.load(wf) | |
cache = self._get_dylib_cache() | |
prefix = cache.prefix_to_dir(pathname) | |
cache_base = os.path.join(cache.base, prefix) | |
if not os.path.isdir(cache_base): | |
os.makedirs(cache_base) | |
for name, relpath in extensions.items(): | |
dest = os.path.join(cache_base, convert_path(relpath)) | |
if not os.path.exists(dest): | |
extract = True | |
else: | |
file_time = os.stat(dest).st_mtime | |
file_time = datetime.datetime.fromtimestamp(file_time) | |
info = zf.getinfo(relpath) | |
wheel_time = datetime.datetime(*info.date_time) | |
extract = wheel_time > file_time | |
if extract: | |
zf.extract(relpath, cache_base) | |
result.append((name, dest)) | |
except KeyError: | |
pass | |
return result | |
def is_compatible(self): | |
""" | |
Determine if a wheel is compatible with the running system. | |
""" | |
return is_compatible(self) | |
def is_mountable(self): | |
""" | |
Determine if a wheel is asserted as mountable by its metadata. | |
""" | |
return True # for now - metadata details TBD | |
def mount(self, append=False): | |
pathname = os.path.abspath(os.path.join(self.dirname, self.filename)) | |
if not self.is_compatible(): | |
msg = 'Wheel %s not compatible with this Python.' % pathname | |
raise DistlibException(msg) | |
if not self.is_mountable(): | |
msg = 'Wheel %s is marked as not mountable.' % pathname | |
raise DistlibException(msg) | |
if pathname in sys.path: | |
logger.debug('%s already in path', pathname) | |
else: | |
if append: | |
sys.path.append(pathname) | |
else: | |
sys.path.insert(0, pathname) | |
extensions = self._get_extensions() | |
if extensions: | |
if _hook not in sys.meta_path: | |
sys.meta_path.append(_hook) | |
_hook.add(pathname, extensions) | |
def unmount(self): | |
pathname = os.path.abspath(os.path.join(self.dirname, self.filename)) | |
if pathname not in sys.path: | |
logger.debug('%s not in path', pathname) | |
else: | |
sys.path.remove(pathname) | |
if pathname in _hook.impure_wheels: | |
_hook.remove(pathname) | |
if not _hook.impure_wheels: | |
if _hook in sys.meta_path: | |
sys.meta_path.remove(_hook) | |
def verify(self): | |
pathname = os.path.join(self.dirname, self.filename) | |
name_ver = '%s-%s' % (self.name, self.version) | |
data_dir = '%s.data' % name_ver | |
info_dir = '%s.dist-info' % name_ver | |
metadata_name = posixpath.join(info_dir, METADATA_FILENAME) | |
wheel_metadata_name = posixpath.join(info_dir, 'WHEEL') | |
record_name = posixpath.join(info_dir, 'RECORD') | |
wrapper = codecs.getreader('utf-8') | |
with ZipFile(pathname, 'r') as zf: | |
with zf.open(wheel_metadata_name) as bwf: | |
wf = wrapper(bwf) | |
message = message_from_file(wf) | |
wv = message['Wheel-Version'].split('.', 1) | |
file_version = tuple([int(i) for i in wv]) | |
# TODO version verification | |
records = {} | |
with zf.open(record_name) as bf: | |
with CSVReader(stream=bf) as reader: | |
for row in reader: | |
p = row[0] | |
records[p] = row | |
for zinfo in zf.infolist(): | |
arcname = zinfo.filename | |
if isinstance(arcname, text_type): | |
u_arcname = arcname | |
else: | |
u_arcname = arcname.decode('utf-8') | |
if '..' in u_arcname: | |
raise DistlibException('invalid entry in ' | |
'wheel: %r' % u_arcname) | |
# The signature file won't be in RECORD, | |
# and we don't currently don't do anything with it | |
if u_arcname.endswith('/RECORD.jws'): | |
continue | |
row = records[u_arcname] | |
if row[2] and str(zinfo.file_size) != row[2]: | |
raise DistlibException('size mismatch for ' | |
'%s' % u_arcname) | |
if row[1]: | |
kind, value = row[1].split('=', 1) | |
with zf.open(arcname) as bf: | |
data = bf.read() | |
_, digest = self.get_hash(data, kind) | |
if digest != value: | |
raise DistlibException('digest mismatch for ' | |
'%s' % arcname) | |
def update(self, modifier, dest_dir=None, **kwargs): | |
""" | |
Update the contents of a wheel in a generic way. The modifier should | |
be a callable which expects a dictionary argument: its keys are | |
archive-entry paths, and its values are absolute filesystem paths | |
where the contents the corresponding archive entries can be found. The | |
modifier is free to change the contents of the files pointed to, add | |
new entries and remove entries, before returning. This method will | |
extract the entire contents of the wheel to a temporary location, call | |
the modifier, and then use the passed (and possibly updated) | |
dictionary to write a new wheel. If ``dest_dir`` is specified, the new | |
wheel is written there -- otherwise, the original wheel is overwritten. | |
The modifier should return True if it updated the wheel, else False. | |
This method returns the same value the modifier returns. | |
""" | |
def get_version(path_map, info_dir): | |
version = path = None | |
key = '%s/%s' % (info_dir, METADATA_FILENAME) | |
if key not in path_map: | |
key = '%s/PKG-INFO' % info_dir | |
if key in path_map: | |
path = path_map[key] | |
version = Metadata(path=path).version | |
return version, path | |
def update_version(version, path): | |
updated = None | |
try: | |
v = NormalizedVersion(version) | |
i = version.find('-') | |
if i < 0: | |
updated = '%s+1' % version | |
else: | |
parts = [int(s) for s in version[i + 1:].split('.')] | |
parts[-1] += 1 | |
updated = '%s+%s' % (version[:i], | |
'.'.join(str(i) for i in parts)) | |
except UnsupportedVersionError: | |
logger.debug('Cannot update non-compliant (PEP-440) ' | |
'version %r', version) | |
if updated: | |
md = Metadata(path=path) | |
md.version = updated | |
legacy = not path.endswith(METADATA_FILENAME) | |
md.write(path=path, legacy=legacy) | |
logger.debug('Version updated from %r to %r', version, | |
updated) | |
pathname = os.path.join(self.dirname, self.filename) | |
name_ver = '%s-%s' % (self.name, self.version) | |
info_dir = '%s.dist-info' % name_ver | |
record_name = posixpath.join(info_dir, 'RECORD') | |
with tempdir() as workdir: | |
with ZipFile(pathname, 'r') as zf: | |
path_map = {} | |
for zinfo in zf.infolist(): | |
arcname = zinfo.filename | |
if isinstance(arcname, text_type): | |
u_arcname = arcname | |
else: | |
u_arcname = arcname.decode('utf-8') | |
if u_arcname == record_name: | |
continue | |
if '..' in u_arcname: | |
raise DistlibException('invalid entry in ' | |
'wheel: %r' % u_arcname) | |
zf.extract(zinfo, workdir) | |
path = os.path.join(workdir, convert_path(u_arcname)) | |
path_map[u_arcname] = path | |
# Remember the version. | |
original_version, _ = get_version(path_map, info_dir) | |
# Files extracted. Call the modifier. | |
modified = modifier(path_map, **kwargs) | |
if modified: | |
# Something changed - need to build a new wheel. | |
current_version, path = get_version(path_map, info_dir) | |
if current_version and (current_version == original_version): | |
# Add or update local version to signify changes. | |
update_version(current_version, path) | |
# Decide where the new wheel goes. | |
if dest_dir is None: | |
fd, newpath = tempfile.mkstemp(suffix='.whl', | |
prefix='wheel-update-', | |
dir=workdir) | |
os.close(fd) | |
else: | |
if not os.path.isdir(dest_dir): | |
raise DistlibException('Not a directory: %r' % dest_dir) | |
newpath = os.path.join(dest_dir, self.filename) | |
archive_paths = list(path_map.items()) | |
distinfo = os.path.join(workdir, info_dir) | |
info = distinfo, info_dir | |
self.write_records(info, workdir, archive_paths) | |
self.build_zip(newpath, archive_paths) | |
if dest_dir is None: | |
shutil.copyfile(newpath, pathname) | |
return modified | |
def compatible_tags(): | |
""" | |
Return (pyver, abi, arch) tuples compatible with this Python. | |
""" | |
versions = [VER_SUFFIX] | |
major = VER_SUFFIX[0] | |
for minor in range(sys.version_info[1] - 1, - 1, -1): | |
versions.append(''.join([major, str(minor)])) | |
abis = [] | |
for suffix, _, _ in imp.get_suffixes(): | |
if suffix.startswith('.abi'): | |
abis.append(suffix.split('.', 2)[1]) | |
abis.sort() | |
if ABI != 'none': | |
abis.insert(0, ABI) | |
abis.append('none') | |
result = [] | |
arches = [ARCH] | |
if sys.platform == 'darwin': | |
m = re.match('(\w+)_(\d+)_(\d+)_(\w+)$', ARCH) | |
if m: | |
name, major, minor, arch = m.groups() | |
minor = int(minor) | |
matches = [arch] | |
if arch in ('i386', 'ppc'): | |
matches.append('fat') | |
if arch in ('i386', 'ppc', 'x86_64'): | |
matches.append('fat3') | |
if arch in ('ppc64', 'x86_64'): | |
matches.append('fat64') | |
if arch in ('i386', 'x86_64'): | |
matches.append('intel') | |
if arch in ('i386', 'x86_64', 'intel', 'ppc', 'ppc64'): | |
matches.append('universal') | |
while minor >= 0: | |
for match in matches: | |
s = '%s_%s_%s_%s' % (name, major, minor, match) | |
if s != ARCH: # already there | |
arches.append(s) | |
minor -= 1 | |
# Most specific - our Python version, ABI and arch | |
for abi in abis: | |
for arch in arches: | |
result.append((''.join((IMP_PREFIX, versions[0])), abi, arch)) | |
# where no ABI / arch dependency, but IMP_PREFIX dependency | |
for i, version in enumerate(versions): | |
result.append((''.join((IMP_PREFIX, version)), 'none', 'any')) | |
if i == 0: | |
result.append((''.join((IMP_PREFIX, version[0])), 'none', 'any')) | |
# no IMP_PREFIX, ABI or arch dependency | |
for i, version in enumerate(versions): | |
result.append((''.join(('py', version)), 'none', 'any')) | |
if i == 0: | |
result.append((''.join(('py', version[0])), 'none', 'any')) | |
return set(result) | |
COMPATIBLE_TAGS = compatible_tags() | |
del compatible_tags | |
def is_compatible(wheel, tags=None): | |
if not isinstance(wheel, Wheel): | |
wheel = Wheel(wheel) # assume it's a filename | |
result = False | |
if tags is None: | |
tags = COMPATIBLE_TAGS | |
for ver, abi, arch in tags: | |
if ver in wheel.pyver and abi in wheel.abi and arch in wheel.arch: | |
result = True | |
break | |
return result |
# Copyright 2015,2016 Nir Cohen | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
""" | |
The ``distro`` package (``distro`` stands for Linux Distribution) provides | |
information about the Linux distribution it runs on, such as a reliable | |
machine-readable distro ID, or version information. | |
It is a renewed alternative implementation for Python's original | |
:py:func:`platform.linux_distribution` function, but it provides much more | |
functionality. An alternative implementation became necessary because Python | |
3.5 deprecated this function, and Python 3.7 is expected to remove it | |
altogether. Its predecessor function :py:func:`platform.dist` was already | |
deprecated since Python 2.6 and is also expected to be removed in Python 3.7. | |
Still, there are many cases in which access to Linux distribution information | |
is needed. See `Python issue 1322 <https://bugs.python.org/issue1322>`_ for | |
more information. | |
""" | |
import os | |
import re | |
import sys | |
import json | |
import shlex | |
import logging | |
import subprocess | |
if not sys.platform.startswith('linux'): | |
raise ImportError('Unsupported platform: {0}'.format(sys.platform)) | |
_UNIXCONFDIR = '/etc' | |
_OS_RELEASE_BASENAME = 'os-release' | |
#: Translation table for normalizing the "ID" attribute defined in os-release | |
#: files, for use by the :func:`distro.id` method. | |
#: | |
#: * Key: Value as defined in the os-release file, translated to lower case, | |
#: with blanks translated to underscores. | |
#: | |
#: * Value: Normalized value. | |
NORMALIZED_OS_ID = {} | |
#: Translation table for normalizing the "Distributor ID" attribute returned by | |
#: the lsb_release command, for use by the :func:`distro.id` method. | |
#: | |
#: * Key: Value as returned by the lsb_release command, translated to lower | |
#: case, with blanks translated to underscores. | |
#: | |
#: * Value: Normalized value. | |
NORMALIZED_LSB_ID = { | |
'enterpriseenterprise': 'oracle', # Oracle Enterprise Linux | |
'redhatenterpriseworkstation': 'rhel', # RHEL 6.7 | |
} | |
#: Translation table for normalizing the distro ID derived from the file name | |
#: of distro release files, for use by the :func:`distro.id` method. | |
#: | |
#: * Key: Value as derived from the file name of a distro release file, | |
#: translated to lower case, with blanks translated to underscores. | |
#: | |
#: * Value: Normalized value. | |
NORMALIZED_DISTRO_ID = { | |
'redhat': 'rhel', # RHEL 6.x, 7.x | |
} | |
# Pattern for content of distro release file (reversed) | |
_DISTRO_RELEASE_CONTENT_REVERSED_PATTERN = re.compile( | |
r'(?:[^)]*\)(.*)\()? *(?:STL )?([\d.+\-a-z]*\d) *(?:esaeler *)?(.+)') | |
# Pattern for base file name of distro release file | |
_DISTRO_RELEASE_BASENAME_PATTERN = re.compile( | |
r'(\w+)[-_](release|version)$') | |
# Base file names to be ignored when searching for distro release file | |
_DISTRO_RELEASE_IGNORE_BASENAMES = ( | |
'debian_version', | |
'lsb-release', | |
'oem-release', | |
_OS_RELEASE_BASENAME, | |
'system-release' | |
) | |
def linux_distribution(full_distribution_name=True): | |
""" | |
Return information about the current Linux distribution as a tuple | |
``(id_name, version, codename)`` with items as follows: | |
* ``id_name``: If *full_distribution_name* is false, the result of | |
:func:`distro.id`. Otherwise, the result of :func:`distro.name`. | |
* ``version``: The result of :func:`distro.version`. | |
* ``codename``: The result of :func:`distro.codename`. | |
The interface of this function is compatible with the original | |
:py:func:`platform.linux_distribution` function, supporting a subset of | |
its parameters. | |
The data it returns may not exactly be the same, because it uses more data | |
sources than the original function, and that may lead to different data if | |
the Linux distribution is not consistent across multiple data sources it | |
provides (there are indeed such distributions ...). | |
Another reason for differences is the fact that the :func:`distro.id` | |
method normalizes the distro ID string to a reliable machine-readable value | |
for a number of popular Linux distributions. | |
""" | |
return _distro.linux_distribution(full_distribution_name) | |
def id(): | |
""" | |
Return the distro ID of the current Linux distribution, as a | |
machine-readable string. | |
For a number of Linux distributions, the returned distro ID value is | |
*reliable*, in the sense that it is documented and that it does not change | |
across releases of the distribution. | |
This package maintains the following reliable distro ID values: | |
============== ========================================= | |
Distro ID Distribution | |
============== ========================================= | |
"ubuntu" Ubuntu | |
"debian" Debian | |
"rhel" RedHat Enterprise Linux | |
"centos" CentOS | |
"fedora" Fedora | |
"sles" SUSE Linux Enterprise Server | |
"opensuse" openSUSE | |
"amazon" Amazon Linux | |
"arch" Arch Linux | |
"cloudlinux" CloudLinux OS | |
"exherbo" Exherbo Linux | |
"gentoo" GenToo Linux | |
"ibm_powerkvm" IBM PowerKVM | |
"kvmibm" KVM for IBM z Systems | |
"linuxmint" Linux Mint | |
"mageia" Mageia | |
"mandriva" Mandriva Linux | |
"parallels" Parallels | |
"pidora" Pidora | |
"raspbian" Raspbian | |
"oracle" Oracle Linux (and Oracle Enterprise Linux) | |
"scientific" Scientific Linux | |
"slackware" Slackware | |
"xenserver" XenServer | |
============== ========================================= | |
If you have a need to get distros for reliable IDs added into this set, | |
or if you find that the :func:`distro.id` function returns a different | |
distro ID for one of the listed distros, please create an issue in the | |
`distro issue tracker`_. | |
**Lookup hierarchy and transformations:** | |
First, the ID is obtained from the following sources, in the specified | |
order. The first available and non-empty value is used: | |
* the value of the "ID" attribute of the os-release file, | |
* the value of the "Distributor ID" attribute returned by the lsb_release | |
command, | |
* the first part of the file name of the distro release file, | |
The so determined ID value then passes the following transformations, | |
before it is returned by this method: | |
* it is translated to lower case, | |
* blanks (which should not be there anyway) are translated to underscores, | |
* a normalization of the ID is performed, based upon | |
`normalization tables`_. The purpose of this normalization is to ensure | |
that the ID is as reliable as possible, even across incompatible changes | |
in the Linux distributions. A common reason for an incompatible change is | |
the addition of an os-release file, or the addition of the lsb_release | |
command, with ID values that differ from what was previously determined | |
from the distro release file name. | |
""" | |
return _distro.id() | |
def name(pretty=False): | |
""" | |
Return the name of the current Linux distribution, as a human-readable | |
string. | |
If *pretty* is false, the name is returned without version or codename. | |
(e.g. "CentOS Linux") | |
If *pretty* is true, the version and codename are appended. | |
(e.g. "CentOS Linux 7.1.1503 (Core)") | |
**Lookup hierarchy:** | |
The name is obtained from the following sources, in the specified order. | |
The first available and non-empty value is used: | |
* If *pretty* is false: | |
- the value of the "NAME" attribute of the os-release file, | |
- the value of the "Distributor ID" attribute returned by the lsb_release | |
command, | |
- the value of the "<name>" field of the distro release file. | |
* If *pretty* is true: | |
- the value of the "PRETTY_NAME" attribute of the os-release file, | |
- the value of the "Description" attribute returned by the lsb_release | |
command, | |
- the value of the "<name>" field of the distro release file, appended | |
with the value of the pretty version ("<version_id>" and "<codename>" | |
fields) of the distro release file, if available. | |
""" | |
return _distro.name(pretty) | |
def version(pretty=False, best=False): | |
""" | |
Return the version of the current Linux distribution, as a human-readable | |
string. | |
If *pretty* is false, the version is returned without codename (e.g. | |
"7.0"). | |
If *pretty* is true, the codename in parenthesis is appended, if the | |
codename is non-empty (e.g. "7.0 (Maipo)"). | |
Some distributions provide version numbers with different precisions in | |
the different sources of distribution information. Examining the different | |
sources in a fixed priority order does not always yield the most precise | |
version (e.g. for Debian 8.2, or CentOS 7.1). | |
The *best* parameter can be used to control the approach for the returned | |
version: | |
If *best* is false, the first non-empty version number in priority order of | |
the examined sources is returned. | |
If *best* is true, the most precise version number out of all examined | |
sources is returned. | |
**Lookup hierarchy:** | |
In all cases, the version number is obtained from the following sources. | |
If *best* is false, this order represents the priority order: | |
* the value of the "VERSION_ID" attribute of the os-release file, | |
* the value of the "Release" attribute returned by the lsb_release | |
command, | |
* the version number parsed from the "<version_id>" field of the first line | |
of the distro release file, | |
* the version number parsed from the "PRETTY_NAME" attribute of the | |
os-release file, if it follows the format of the distro release files. | |
* the version number parsed from the "Description" attribute returned by | |
the lsb_release command, if it follows the format of the distro release | |
files. | |
""" | |
return _distro.version(pretty, best) | |
def version_parts(best=False): | |
""" | |
Return the version of the current Linux distribution as a tuple | |
``(major, minor, build_number)`` with items as follows: | |
* ``major``: The result of :func:`distro.major_version`. | |
* ``minor``: The result of :func:`distro.minor_version`. | |
* ``build_number``: The result of :func:`distro.build_number`. | |
For a description of the *best* parameter, see the :func:`distro.version` | |
method. | |
""" | |
return _distro.version_parts(best) | |
def major_version(best=False): | |
""" | |
Return the major version of the current Linux distribution, as a string, | |
if provided. | |
Otherwise, the empty string is returned. The major version is the first | |
part of the dot-separated version string. | |
For a description of the *best* parameter, see the :func:`distro.version` | |
method. | |
""" | |
return _distro.major_version(best) | |
def minor_version(best=False): | |
""" | |
Return the minor version of the current Linux distribution, as a string, | |
if provided. | |
Otherwise, the empty string is returned. The minor version is the second | |
part of the dot-separated version string. | |
For a description of the *best* parameter, see the :func:`distro.version` | |
method. | |
""" | |
return _distro.minor_version(best) | |
def build_number(best=False): | |
""" | |
Return the build number of the current Linux distribution, as a string, | |
if provided. | |
Otherwise, the empty string is returned. The build number is the third part | |
of the dot-separated version string. | |
For a description of the *best* parameter, see the :func:`distro.version` | |
method. | |
""" | |
return _distro.build_number(best) | |
def like(): | |
""" | |
Return a space-separated list of distro IDs of distributions that are | |
closely related to the current Linux distribution in regards to packaging | |
and programming interfaces, for example distributions the current | |
distribution is a derivative from. | |
**Lookup hierarchy:** | |
This information item is only provided by the os-release file. | |
For details, see the description of the "ID_LIKE" attribute in the | |
`os-release man page | |
<http://www.freedesktop.org/software/systemd/man/os-release.html>`_. | |
""" | |
return _distro.like() | |
def codename(): | |
""" | |
Return the codename for the release of the current Linux distribution, | |
as a string. | |
If the distribution does not have a codename, an empty string is returned. | |
Note that the returned codename is not always really a codename. For | |
example, openSUSE returns "x86_64". This function does not handle such | |
cases in any special way and just returns the string it finds, if any. | |
**Lookup hierarchy:** | |
* the codename within the "VERSION" attribute of the os-release file, if | |
provided, | |
* the value of the "Codename" attribute returned by the lsb_release | |
command, | |
* the value of the "<codename>" field of the distro release file. | |
""" | |
return _distro.codename() | |
def info(pretty=False, best=False): | |
""" | |
Return certain machine-readable information items about the current Linux | |
distribution in a dictionary, as shown in the following example: | |
.. sourcecode:: python | |
{ | |
'id': 'rhel', | |
'version': '7.0', | |
'version_parts': { | |
'major': '7', | |
'minor': '0', | |
'build_number': '' | |
}, | |
'like': 'fedora', | |
'codename': 'Maipo' | |
} | |
The dictionary structure and keys are always the same, regardless of which | |
information items are available in the underlying data sources. The values | |
for the various keys are as follows: | |
* ``id``: The result of :func:`distro.id`. | |
* ``version``: The result of :func:`distro.version`. | |
* ``version_parts -> major``: The result of :func:`distro.major_version`. | |
* ``version_parts -> minor``: The result of :func:`distro.minor_version`. | |
* ``version_parts -> build_number``: The result of | |
:func:`distro.build_number`. | |
* ``like``: The result of :func:`distro.like`. | |
* ``codename``: The result of :func:`distro.codename`. | |
For a description of the *pretty* and *best* parameters, see the | |
:func:`distro.version` method. | |
""" | |
return _distro.info(pretty, best) | |
def os_release_info(): | |
""" | |
Return a dictionary containing key-value pairs for the information items | |
from the os-release file data source of the current Linux distribution. | |
See `os-release file`_ for details about these information items. | |
""" | |
return _distro.os_release_info() | |
def lsb_release_info(): | |
""" | |
Return a dictionary containing key-value pairs for the information items | |
from the lsb_release command data source of the current Linux distribution. | |
See `lsb_release command output`_ for details about these information | |
items. | |
""" | |
return _distro.lsb_release_info() | |
def distro_release_info(): | |
""" | |
Return a dictionary containing key-value pairs for the information items | |
from the distro release file data source of the current Linux distribution. | |
See `distro release file`_ for details about these information items. | |
""" | |
return _distro.distro_release_info() | |
def os_release_attr(attribute): | |
""" | |
Return a single named information item from the os-release file data source | |
of the current Linux distribution. | |
Parameters: | |
* ``attribute`` (string): Key of the information item. | |
Returns: | |
* (string): Value of the information item, if the item exists. | |
The empty string, if the item does not exist. | |
See `os-release file`_ for details about these information items. | |
""" | |
return _distro.os_release_attr(attribute) | |
def lsb_release_attr(attribute): | |
""" | |
Return a single named information item from the lsb_release command output | |
data source of the current Linux distribution. | |
Parameters: | |
* ``attribute`` (string): Key of the information item. | |
Returns: | |
* (string): Value of the information item, if the item exists. | |
The empty string, if the item does not exist. | |
See `lsb_release command output`_ for details about these information | |
items. | |
""" | |
return _distro.lsb_release_attr(attribute) | |
def distro_release_attr(attribute): | |
""" | |
Return a single named information item from the distro release file | |
data source of the current Linux distribution. | |
Parameters: | |
* ``attribute`` (string): Key of the information item. | |
Returns: | |
* (string): Value of the information item, if the item exists. | |
The empty string, if the item does not exist. | |
See `distro release file`_ for details about these information items. | |
""" | |
return _distro.distro_release_attr(attribute) | |
class LinuxDistribution(object): | |
""" | |
Provides information about a Linux distribution. | |
This package creates a private module-global instance of this class with | |
default initialization arguments, that is used by the | |
`consolidated accessor functions`_ and `single source accessor functions`_. | |
By using default initialization arguments, that module-global instance | |
returns data about the current Linux distribution (i.e. the distro this | |
package runs on). | |
Normally, it is not necessary to create additional instances of this class. | |
However, in situations where control is needed over the exact data sources | |
that are used, instances of this class can be created with a specific | |
distro release file, or a specific os-release file, or without invoking the | |
lsb_release command. | |
""" | |
def __init__(self, | |
include_lsb=True, | |
os_release_file='', | |
distro_release_file=''): | |
""" | |
The initialization method of this class gathers information from the | |
available data sources, and stores that in private instance attributes. | |
Subsequent access to the information items uses these private instance | |
attributes, so that the data sources are read only once. | |
Parameters: | |
* ``include_lsb`` (bool): Controls whether the | |
`lsb_release command output`_ is included as a data source. | |
If the lsb_release command is not available in the program execution | |
path, the data source for the lsb_release command will be empty. | |
* ``os_release_file`` (string): The path name of the | |
`os-release file`_ that is to be used as a data source. | |
An empty string (the default) will cause the default path name to | |
be used (see `os-release file`_ for details). | |
If the specified or defaulted os-release file does not exist, the | |
data source for the os-release file will be empty. | |
* ``distro_release_file`` (string): The path name of the | |
`distro release file`_ that is to be used as a data source. | |
An empty string (the default) will cause a default search algorithm | |
to be used (see `distro release file`_ for details). | |
If the specified distro release file does not exist, or if no default | |
distro release file can be found, the data source for the distro | |
release file will be empty. | |
Public instance attributes: | |
* ``os_release_file`` (string): The path name of the | |
`os-release file`_ that is actually used as a data source. The | |
empty string if no distro release file is used as a data source. | |
* ``distro_release_file`` (string): The path name of the | |
`distro release file`_ that is actually used as a data source. The | |
empty string if no distro release file is used as a data source. | |
Raises: | |
* :py:exc:`IOError`: Some I/O issue with an os-release file or distro | |
release file. | |
* :py:exc:`subprocess.CalledProcessError`: The lsb_release command had | |
some issue (other than not being available in the program execution | |
path). | |
* :py:exc:`UnicodeError`: A data source has unexpected characters or | |
uses an unexpected encoding. | |
""" | |
self.os_release_file = os_release_file or \ | |
os.path.join(_UNIXCONFDIR, _OS_RELEASE_BASENAME) | |
self.distro_release_file = distro_release_file or '' # updated later | |
self._os_release_info = self._get_os_release_info() | |
self._lsb_release_info = self._get_lsb_release_info() \ | |
if include_lsb else {} | |
self._distro_release_info = self._get_distro_release_info() | |
def __repr__(self): | |
"""Return repr of all info | |
""" | |
return \ | |
"LinuxDistribution(" \ | |
"os_release_file={0!r}, " \ | |
"distro_release_file={1!r}, " \ | |
"_os_release_info={2!r}, " \ | |
"_lsb_release_info={3!r}, " \ | |
"_distro_release_info={4!r})".format( | |
self.os_release_file, | |
self.distro_release_file, | |
self._os_release_info, | |
self._lsb_release_info, | |
self._distro_release_info) | |
def linux_distribution(self, full_distribution_name=True): | |
""" | |
Return information about the Linux distribution that is compatible | |
with Python's :func:`platform.linux_distribution`, supporting a subset | |
of its parameters. | |
For details, see :func:`distro.linux_distribution`. | |
""" | |
return ( | |
self.name() if full_distribution_name else self.id(), | |
self.version(), | |
self.codename() | |
) | |
def id(self): | |
"""Return the distro ID of the Linux distribution, as a string. | |
For details, see :func:`distro.id`. | |
""" | |
def normalize(distro_id, table): | |
distro_id = distro_id.lower().replace(' ', '_') | |
return table.get(distro_id, distro_id) | |
distro_id = self.os_release_attr('id') | |
if distro_id: | |
return normalize(distro_id, NORMALIZED_OS_ID) | |
distro_id = self.lsb_release_attr('distributor_id') | |
if distro_id: | |
return normalize(distro_id, NORMALIZED_LSB_ID) | |
distro_id = self.distro_release_attr('id') | |
if distro_id: | |
return normalize(distro_id, NORMALIZED_DISTRO_ID) | |
return '' | |
def name(self, pretty=False): | |
""" | |
Return the name of the Linux distribution, as a string. | |
For details, see :func:`distro.name`. | |
""" | |
name = self.os_release_attr('name') \ | |
or self.lsb_release_attr('distributor_id') \ | |
or self.distro_release_attr('name') | |
if pretty: | |
name = self.os_release_attr('pretty_name') \ | |
or self.lsb_release_attr('description') | |
if not name: | |
name = self.distro_release_attr('name') | |
version = self.version(pretty=True) | |
if version: | |
name = name + ' ' + version | |
return name or '' | |
def version(self, pretty=False, best=False): | |
""" | |
Return the version of the Linux distribution, as a string. | |
For details, see :func:`distro.version`. | |
""" | |
versions = [ | |
self.os_release_attr('version_id'), | |
self.lsb_release_attr('release'), | |
self.distro_release_attr('version_id'), | |
self._parse_distro_release_content( | |
self.os_release_attr('pretty_name')).get('version_id', ''), | |
self._parse_distro_release_content( | |
self.lsb_release_attr('description')).get('version_id', '') | |
] | |
version = '' | |
if best: | |
# This algorithm uses the last version in priority order that has | |
# the best precision. If the versions are not in conflict, that | |
# does not matter; otherwise, using the last one instead of the | |
# first one might be considered a surprise. | |
for v in versions: | |
if v.count(".") > version.count(".") or version == '': | |
version = v | |
else: | |
for v in versions: | |
if v != '': | |
version = v | |
break | |
if pretty and version and self.codename(): | |
version = u'{0} ({1})'.format(version, self.codename()) | |
return version | |
def version_parts(self, best=False): | |
""" | |
Return the version of the Linux distribution, as a tuple of version | |
numbers. | |
For details, see :func:`distro.version_parts`. | |
""" | |
version_str = self.version(best=best) | |
if version_str: | |
version_regex = re.compile(r'(\d+)\.?(\d+)?\.?(\d+)?') | |
matches = version_regex.match(version_str) | |
if matches: | |
major, minor, build_number = matches.groups() | |
return major, minor or '', build_number or '' | |
return '', '', '' | |
def major_version(self, best=False): | |
""" | |
Return the major version number of the current distribution. | |
For details, see :func:`distro.major_version`. | |
""" | |
return self.version_parts(best)[0] | |
def minor_version(self, best=False): | |
""" | |
Return the minor version number of the Linux distribution. | |
For details, see :func:`distro.minor_version`. | |
""" | |
return self.version_parts(best)[1] | |
def build_number(self, best=False): | |
""" | |
Return the build number of the Linux distribution. | |
For details, see :func:`distro.build_number`. | |
""" | |
return self.version_parts(best)[2] | |
def like(self): | |
""" | |
Return the IDs of distributions that are like the Linux distribution. | |
For details, see :func:`distro.like`. | |
""" | |
return self.os_release_attr('id_like') or '' | |
def codename(self): | |
""" | |
Return the codename of the Linux distribution. | |
For details, see :func:`distro.codename`. | |
""" | |
return self.os_release_attr('codename') \ | |
or self.lsb_release_attr('codename') \ | |
or self.distro_release_attr('codename') \ | |
or '' | |
def info(self, pretty=False, best=False): | |
""" | |
Return certain machine-readable information about the Linux | |
distribution. | |
For details, see :func:`distro.info`. | |
""" | |
return dict( | |
id=self.id(), | |
version=self.version(pretty, best), | |
version_parts=dict( | |
major=self.major_version(best), | |
minor=self.minor_version(best), | |
build_number=self.build_number(best) | |
), | |
like=self.like(), | |
codename=self.codename(), | |
) | |
def os_release_info(self): | |
""" | |
Return a dictionary containing key-value pairs for the information | |
items from the os-release file data source of the Linux distribution. | |
For details, see :func:`distro.os_release_info`. | |
""" | |
return self._os_release_info | |
def lsb_release_info(self): | |
""" | |
Return a dictionary containing key-value pairs for the information | |
items from the lsb_release command data source of the Linux | |
distribution. | |
For details, see :func:`distro.lsb_release_info`. | |
""" | |
return self._lsb_release_info | |
def distro_release_info(self): | |
""" | |
Return a dictionary containing key-value pairs for the information | |
items from the distro release file data source of the Linux | |
distribution. | |
For details, see :func:`distro.distro_release_info`. | |
""" | |
return self._distro_release_info | |
def os_release_attr(self, attribute): | |
""" | |
Return a single named information item from the os-release file data | |
source of the Linux distribution. | |
For details, see :func:`distro.os_release_attr`. | |
""" | |
return self._os_release_info.get(attribute, '') | |
def lsb_release_attr(self, attribute): | |
""" | |
Return a single named information item from the lsb_release command | |
output data source of the Linux distribution. | |
For details, see :func:`distro.lsb_release_attr`. | |
""" | |
return self._lsb_release_info.get(attribute, '') | |
def distro_release_attr(self, attribute): | |
""" | |
Return a single named information item from the distro release file | |
data source of the Linux distribution. | |
For details, see :func:`distro.distro_release_attr`. | |
""" | |
return self._distro_release_info.get(attribute, '') | |
def _get_os_release_info(self): | |
""" | |
Get the information items from the specified os-release file. | |
Returns: | |
A dictionary containing all information items. | |
""" | |
if os.path.isfile(self.os_release_file): | |
with open(self.os_release_file) as release_file: | |
return self._parse_os_release_content(release_file) | |
return {} | |
@staticmethod | |
def _parse_os_release_content(lines): | |
""" | |
Parse the lines of an os-release file. | |
Parameters: | |
* lines: Iterable through the lines in the os-release file. | |
Each line must be a unicode string or a UTF-8 encoded byte | |
string. | |
Returns: | |
A dictionary containing all information items. | |
""" | |
props = {} | |
lexer = shlex.shlex(lines, posix=True) | |
lexer.whitespace_split = True | |
# The shlex module defines its `wordchars` variable using literals, | |
# making it dependent on the encoding of the Python source file. | |
# In Python 2.6 and 2.7, the shlex source file is encoded in | |
# 'iso-8859-1', and the `wordchars` variable is defined as a byte | |
# string. This causes a UnicodeDecodeError to be raised when the | |
# parsed content is a unicode object. The following fix resolves that | |
# (... but it should be fixed in shlex...): | |
if sys.version_info[0] == 2 and isinstance(lexer.wordchars, bytes): | |
lexer.wordchars = lexer.wordchars.decode('iso-8859-1') | |
tokens = list(lexer) | |
for token in tokens: | |
# At this point, all shell-like parsing has been done (i.e. | |
# comments processed, quotes and backslash escape sequences | |
# processed, multi-line values assembled, trailing newlines | |
# stripped, etc.), so the tokens are now either: | |
# * variable assignments: var=value | |
# * commands or their arguments (not allowed in os-release) | |
if '=' in token: | |
k, v = token.split('=', 1) | |
if isinstance(v, bytes): | |
v = v.decode('utf-8') | |
props[k.lower()] = v | |
if k == 'VERSION': | |
# this handles cases in which the codename is in | |
# the `(CODENAME)` (rhel, centos, fedora) format | |
# or in the `, CODENAME` format (Ubuntu). | |
codename = re.search(r'(\(\D+\))|,(\s+)?\D+', v) | |
if codename: | |
codename = codename.group() | |
codename = codename.strip('()') | |
codename = codename.strip(',') | |
codename = codename.strip() | |
# codename appears within paranthese. | |
props['codename'] = codename | |
else: | |
props['codename'] = '' | |
else: | |
# Ignore any tokens that are not variable assignments | |
pass | |
return props | |
def _get_lsb_release_info(self): | |
""" | |
Get the information items from the lsb_release command output. | |
Returns: | |
A dictionary containing all information items. | |
""" | |
cmd = 'lsb_release -a' | |
process = subprocess.Popen( | |
cmd, | |
shell=True, | |
stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE) | |
stdout, stderr = process.communicate() | |
stdout, stderr = stdout.decode('utf-8'), stderr.decode('utf-8') | |
code = process.returncode | |
if code == 0: | |
content = stdout.splitlines() | |
return self._parse_lsb_release_content(content) | |
elif code == 127: # Command not found | |
return {} | |
else: | |
if sys.version_info[:2] >= (3, 5): | |
raise subprocess.CalledProcessError(code, cmd, stdout, stderr) | |
elif sys.version_info[:2] >= (2, 7): | |
raise subprocess.CalledProcessError(code, cmd, stdout) | |
elif sys.version_info[:2] == (2, 6): | |
raise subprocess.CalledProcessError(code, cmd) | |
@staticmethod | |
def _parse_lsb_release_content(lines): | |
""" | |
Parse the output of the lsb_release command. | |
Parameters: | |
* lines: Iterable through the lines of the lsb_release output. | |
Each line must be a unicode string or a UTF-8 encoded byte | |
string. | |
Returns: | |
A dictionary containing all information items. | |
""" | |
props = {} | |
for line in lines: | |
line = line.decode('utf-8') if isinstance(line, bytes) else line | |
kv = line.strip('\n').split(':', 1) | |
if len(kv) != 2: | |
# Ignore lines without colon. | |
continue | |
k, v = kv | |
props.update({k.replace(' ', '_').lower(): v.strip()}) | |
return props | |
def _get_distro_release_info(self): | |
""" | |
Get the information items from the specified distro release file. | |
Returns: | |
A dictionary containing all information items. | |
""" | |
if self.distro_release_file: | |
# If it was specified, we use it and parse what we can, even if | |
# its file name or content does not match the expected pattern. | |
distro_info = self._parse_distro_release_file( | |
self.distro_release_file) | |
basename = os.path.basename(self.distro_release_file) | |
# The file name pattern for user-specified distro release files | |
# is somewhat more tolerant (compared to when searching for the | |
# file), because we want to use what was specified as best as | |
# possible. | |
match = _DISTRO_RELEASE_BASENAME_PATTERN.match(basename) | |
if match: | |
distro_info['id'] = match.group(1) | |
return distro_info | |
else: | |
basenames = os.listdir(_UNIXCONFDIR) | |
# We sort for repeatability in cases where there are multiple | |
# distro specific files; e.g. CentOS, Oracle, Enterprise all | |
# containing `redhat-release` on top of their own. | |
basenames.sort() | |
for basename in basenames: | |
if basename in _DISTRO_RELEASE_IGNORE_BASENAMES: | |
continue | |
match = _DISTRO_RELEASE_BASENAME_PATTERN.match(basename) | |
if match: | |
filepath = os.path.join(_UNIXCONFDIR, basename) | |
distro_info = self._parse_distro_release_file(filepath) | |
if 'name' in distro_info: | |
# The name is always present if the pattern matches | |
self.distro_release_file = filepath | |
distro_info['id'] = match.group(1) | |
return distro_info | |
return {} | |
def _parse_distro_release_file(self, filepath): | |
""" | |
Parse a distro release file. | |
Parameters: | |
* filepath: Path name of the distro release file. | |
Returns: | |
A dictionary containing all information items. | |
""" | |
if os.path.isfile(filepath): | |
with open(filepath) as fp: | |
# Only parse the first line. For instance, on SLES there | |
# are multiple lines. We don't want them... | |
return self._parse_distro_release_content(fp.readline()) | |
return {} | |
@staticmethod | |
def _parse_distro_release_content(line): | |
""" | |
Parse a line from a distro release file. | |
Parameters: | |
* line: Line from the distro release file. Must be a unicode string | |
or a UTF-8 encoded byte string. | |
Returns: | |
A dictionary containing all information items. | |
""" | |
if isinstance(line, bytes): | |
line = line.decode('utf-8') | |
matches = _DISTRO_RELEASE_CONTENT_REVERSED_PATTERN.match( | |
line.strip()[::-1]) | |
distro_info = {} | |
if matches: | |
# regexp ensures non-None | |
distro_info['name'] = matches.group(3)[::-1] | |
if matches.group(2): | |
distro_info['version_id'] = matches.group(2)[::-1] | |
if matches.group(1): | |
distro_info['codename'] = matches.group(1)[::-1] | |
elif line: | |
distro_info['name'] = line.strip() | |
return distro_info | |
_distro = LinuxDistribution() | |
def main(): | |
import argparse | |
logger = logging.getLogger(__name__) | |
logger.setLevel(logging.DEBUG) | |
logger.addHandler(logging.StreamHandler(sys.stdout)) | |
parser = argparse.ArgumentParser(description="Linux distro info tool") | |
parser.add_argument( | |
'--json', | |
'-j', | |
help="Output in machine readable format", | |
action="store_true") | |
args = parser.parse_args() | |
if args.json: | |
logger.info(json.dumps(info(), indent=4, sort_keys=True)) | |
else: | |
logger.info('Name: %s', name(pretty=True)) | |
distribution_version = version(pretty=True) | |
if distribution_version: | |
logger.info('Version: %s', distribution_version) | |
distribution_codename = codename() | |
if distribution_codename: | |
logger.info('Codename: %s', distribution_codename) | |
if __name__ == '__main__': | |
main() |
""" | |
HTML parsing library based on the WHATWG "HTML5" | |
specification. The parser is designed to be compatible with existing | |
HTML found in the wild and implements well-defined error recovery that | |
is largely compatible with modern desktop web browsers. | |
Example usage: | |
import html5lib | |
f = open("my_document.html") | |
tree = html5lib.parse(f) | |
""" | |
from __future__ import absolute_import, division, unicode_literals | |
from .html5parser import HTMLParser, parse, parseFragment | |
from .treebuilders import getTreeBuilder | |
from .treewalkers import getTreeWalker | |
from .serializer import serialize | |
__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder", | |
"getTreeWalker", "serialize"] | |
# this has to be at the top level, see how setup.py parses this | |
__version__ = "1.0b10" |
from __future__ import absolute_import, division, unicode_literals | |
import re | |
import warnings | |
from .constants import DataLossWarning | |
baseChar = """ | |
[#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | | |
[#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] | | |
[#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] | | |
[#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 | | |
[#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] | | |
[#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] | | |
[#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] | | |
[#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] | | |
[#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 | | |
[#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] | | |
[#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] | | |
[#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D | | |
[#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] | | |
[#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] | | |
[#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] | | |
[#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] | | |
[#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] | | |
[#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] | | |
[#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 | | |
[#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] | | |
[#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] | | |
[#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | | |
[#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] | | |
[#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] | | |
[#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | | |
[#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] | | |
[#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] | | |
[#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | | |
[#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] | | |
[#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A | | |
#x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 | | |
#x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] | | |
#x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] | | |
[#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] | | |
[#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C | | |
#x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 | | |
[#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] | | |
[#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] | | |
[#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 | | |
[#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] | | |
[#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B | | |
#x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE | | |
[#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | | |
[#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 | | |
[#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] | | |
[#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]""" | |
ideographic = """[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]""" | |
combiningCharacter = """ | |
[#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] | | |
[#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 | | |
[#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] | | |
[#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] | | |
#x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] | | |
[#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] | | |
[#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 | | |
#x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] | | |
[#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC | | |
[#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] | | |
#x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] | | |
[#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | | |
[#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] | | |
[#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] | | |
[#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | | |
[#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] | | |
#x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 | | |
[#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] | | |
#x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] | | |
[#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] | | |
[#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] | | |
#x3099 | #x309A""" | |
digit = """ | |
[#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] | | |
[#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | | |
[#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] | | |
[#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]""" | |
extender = """ | |
#x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 | | |
#[#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]""" | |
letter = " | ".join([baseChar, ideographic]) | |
# Without the | |
name = " | ".join([letter, digit, ".", "-", "_", combiningCharacter, | |
extender]) | |
nameFirst = " | ".join([letter, "_"]) | |
reChar = re.compile(r"#x([\d|A-F]{4,4})") | |
reCharRange = re.compile(r"\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]") | |
def charStringToList(chars): | |
charRanges = [item.strip() for item in chars.split(" | ")] | |
rv = [] | |
for item in charRanges: | |
foundMatch = False | |
for regexp in (reChar, reCharRange): | |
match = regexp.match(item) | |
if match is not None: | |
rv.append([hexToInt(item) for item in match.groups()]) | |
if len(rv[-1]) == 1: | |
rv[-1] = rv[-1] * 2 | |
foundMatch = True | |
break | |
if not foundMatch: | |
assert len(item) == 1 | |
rv.append([ord(item)] * 2) | |
rv = normaliseCharList(rv) | |
return rv | |
def normaliseCharList(charList): | |
charList = sorted(charList) | |
for item in charList: | |
assert item[1] >= item[0] | |
rv = [] | |
i = 0 | |
while i < len(charList): | |
j = 1 | |
rv.append(charList[i]) | |
while i + j < len(charList) and charList[i + j][0] <= rv[-1][1] + 1: | |
rv[-1][1] = charList[i + j][1] | |
j += 1 | |
i += j | |
return rv | |
# We don't really support characters above the BMP :( | |
max_unicode = int("FFFF", 16) | |
def missingRanges(charList): | |
rv = [] | |
if charList[0] != 0: | |
rv.append([0, charList[0][0] - 1]) | |
for i, item in enumerate(charList[:-1]): | |
rv.append([item[1] + 1, charList[i + 1][0] - 1]) | |
if charList[-1][1] != max_unicode: | |
rv.append([charList[-1][1] + 1, max_unicode]) | |
return rv | |
def listToRegexpStr(charList): | |
rv = [] | |
for item in charList: | |
if item[0] == item[1]: | |
rv.append(escapeRegexp(chr(item[0]))) | |
else: | |
rv.append(escapeRegexp(chr(item[0])) + "-" + | |
escapeRegexp(chr(item[1]))) | |
return "[%s]" % "".join(rv) | |
def hexToInt(hex_str): | |
return int(hex_str, 16) | |
def escapeRegexp(string): | |
specialCharacters = (".", "^", "$", "*", "+", "?", "{", "}", | |
"[", "]", "|", "(", ")", "-") | |
for char in specialCharacters: | |
string = string.replace(char, "\\" + char) | |
return string | |
# output from the above | |
nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa | |
nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa | |
# Simpler things | |
nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\-\'()+,./:=?;!*#@$_%]") | |
class InfosetFilter(object): | |
replacementRegexp = re.compile(r"U[\dA-F]{5,5}") | |
def __init__(self, | |
dropXmlnsLocalName=False, | |
dropXmlnsAttrNs=False, | |
preventDoubleDashComments=False, | |
preventDashAtCommentEnd=False, | |
replaceFormFeedCharacters=True, | |
preventSingleQuotePubid=False): | |
self.dropXmlnsLocalName = dropXmlnsLocalName | |
self.dropXmlnsAttrNs = dropXmlnsAttrNs | |
self.preventDoubleDashComments = preventDoubleDashComments | |
self.preventDashAtCommentEnd = preventDashAtCommentEnd | |
self.replaceFormFeedCharacters = replaceFormFeedCharacters | |
self.preventSingleQuotePubid = preventSingleQuotePubid | |
self.replaceCache = {} | |
def coerceAttribute(self, name, namespace=None): | |
if self.dropXmlnsLocalName and name.startswith("xmlns:"): | |
warnings.warn("Attributes cannot begin with xmlns", DataLossWarning) | |
return None | |
elif (self.dropXmlnsAttrNs and | |
namespace == "http://www.w3.org/2000/xmlns/"): | |
warnings.warn("Attributes cannot be in the xml namespace", DataLossWarning) | |
return None | |
else: | |
return self.toXmlName(name) | |
def coerceElement(self, name): | |
return self.toXmlName(name) | |
def coerceComment(self, data): | |
if self.preventDoubleDashComments: | |
while "--" in data: | |
warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning) | |
data = data.replace("--", "- -") | |
if data.endswith("-"): | |
warnings.warn("Comments cannot end in a dash", DataLossWarning) | |
data += " " | |
return data | |
def coerceCharacters(self, data): | |
if self.replaceFormFeedCharacters: | |
for _ in range(data.count("\x0C")): | |
warnings.warn("Text cannot contain U+000C", DataLossWarning) | |
data = data.replace("\x0C", " ") | |
# Other non-xml characters | |
return data | |
def coercePubid(self, data): | |
dataOutput = data | |
for char in nonPubidCharRegexp.findall(data): | |
warnings.warn("Coercing non-XML pubid", DataLossWarning) | |
replacement = self.getReplacementCharacter(char) | |
dataOutput = dataOutput.replace(char, replacement) | |
if self.preventSingleQuotePubid and dataOutput.find("'") >= 0: | |
warnings.warn("Pubid cannot contain single quote", DataLossWarning) | |
dataOutput = dataOutput.replace("'", self.getReplacementCharacter("'")) | |
return dataOutput | |
def toXmlName(self, name): | |
nameFirst = name[0] | |
nameRest = name[1:] | |
m = nonXmlNameFirstBMPRegexp.match(nameFirst) | |
if m: | |
warnings.warn("Coercing non-XML name", DataLossWarning) | |
nameFirstOutput = self.getReplacementCharacter(nameFirst) | |
else: | |
nameFirstOutput = nameFirst | |
nameRestOutput = nameRest | |
replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest)) | |
for char in replaceChars: | |
warnings.warn("Coercing non-XML name", DataLossWarning) | |
replacement = self.getReplacementCharacter(char) | |
nameRestOutput = nameRestOutput.replace(char, replacement) | |
return nameFirstOutput + nameRestOutput | |
def getReplacementCharacter(self, char): | |
if char in self.replaceCache: | |
replacement = self.replaceCache[char] | |
else: | |
replacement = self.escapeChar(char) | |
return replacement | |
def fromXmlName(self, name): | |
for item in set(self.replacementRegexp.findall(name)): | |
name = name.replace(item, self.unescapeChar(item)) | |
return name | |
def escapeChar(self, char): | |
replacement = "U%05X" % ord(char) | |
self.replaceCache[char] = replacement | |
return replacement | |
def unescapeChar(self, charcode): | |
return chr(int(charcode[1:], 16)) |
from __future__ import absolute_import, division, unicode_literals | |
from pip._vendor.six import text_type, binary_type | |
from pip._vendor.six.moves import http_client, urllib | |
import codecs | |
import re | |
from pip._vendor import webencodings | |
from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase | |
from .constants import ReparseException | |
from . import _utils | |
from io import StringIO | |
try: | |
from io import BytesIO | |
except ImportError: | |
BytesIO = StringIO | |
# Non-unicode versions of constants for use in the pre-parser | |
spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters]) | |
asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters]) | |
asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase]) | |
spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"]) | |
invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]" # noqa | |
if _utils.supports_lone_surrogates: | |
# Use one extra step of indirection and create surrogates with | |
# eval. Not using this indirection would introduce an illegal | |
# unicode literal on platforms not supporting such lone | |
# surrogates. | |
assert invalid_unicode_no_surrogate[-1] == "]" and invalid_unicode_no_surrogate.count("]") == 1 | |
invalid_unicode_re = re.compile(invalid_unicode_no_surrogate[:-1] + | |
eval('"\\uD800-\\uDFFF"') + # pylint:disable=eval-used | |
"]") | |
else: | |
invalid_unicode_re = re.compile(invalid_unicode_no_surrogate) | |
non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, | |
0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, | |
0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE, | |
0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF, | |
0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, | |
0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, | |
0x10FFFE, 0x10FFFF]) | |
ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E]") | |
# Cache for charsUntil() | |
charsUntilRegEx = {} | |
class BufferedStream(object): | |
"""Buffering for streams that do not have buffering of their own | |
The buffer is implemented as a list of chunks on the assumption that | |
joining many strings will be slow since it is O(n**2) | |
""" | |
def __init__(self, stream): | |
self.stream = stream | |
self.buffer = [] | |
self.position = [-1, 0] # chunk number, offset | |
def tell(self): | |
pos = 0 | |
for chunk in self.buffer[:self.position[0]]: | |
pos += len(chunk) | |
pos += self.position[1] | |
return pos | |
def seek(self, pos): | |
assert pos <= self._bufferedBytes() | |
offset = pos | |
i = 0 | |
while len(self.buffer[i]) < offset: | |
offset -= len(self.buffer[i]) | |
i += 1 | |
self.position = [i, offset] | |
def read(self, bytes): | |
if not self.buffer: | |
return self._readStream(bytes) | |
elif (self.position[0] == len(self.buffer) and | |
self.position[1] == len(self.buffer[-1])): | |
return self._readStream(bytes) | |
else: | |
return self._readFromBuffer(bytes) | |
def _bufferedBytes(self): | |
return sum([len(item) for item in self.buffer]) | |
def _readStream(self, bytes): | |
data = self.stream.read(bytes) | |
self.buffer.append(data) | |
self.position[0] += 1 | |
self.position[1] = len(data) | |
return data | |
def _readFromBuffer(self, bytes): | |
remainingBytes = bytes | |
rv = [] | |
bufferIndex = self.position[0] | |
bufferOffset = self.position[1] | |
while bufferIndex < len(self.buffer) and remainingBytes != 0: | |
assert remainingBytes > 0 | |
bufferedData = self.buffer[bufferIndex] | |
if remainingBytes <= len(bufferedData) - bufferOffset: | |
bytesToRead = remainingBytes | |
self.position = [bufferIndex, bufferOffset + bytesToRead] | |
else: | |
bytesToRead = len(bufferedData) - bufferOffset | |
self.position = [bufferIndex, len(bufferedData)] | |
bufferIndex += 1 | |
rv.append(bufferedData[bufferOffset:bufferOffset + bytesToRead]) | |
remainingBytes -= bytesToRead | |
bufferOffset = 0 | |
if remainingBytes: | |
rv.append(self._readStream(remainingBytes)) | |
return b"".join(rv) | |
def HTMLInputStream(source, **kwargs): | |
# Work around Python bug #20007: read(0) closes the connection. | |
# http://bugs.python.org/issue20007 | |
if (isinstance(source, http_client.HTTPResponse) or | |
# Also check for addinfourl wrapping HTTPResponse | |
(isinstance(source, urllib.response.addbase) and | |
isinstance(source.fp, http_client.HTTPResponse))): | |
isUnicode = False | |
elif hasattr(source, "read"): | |
isUnicode = isinstance(source.read(0), text_type) | |
else: | |
isUnicode = isinstance(source, text_type) | |
if isUnicode: | |
encodings = [x for x in kwargs if x.endswith("_encoding")] | |
if encodings: | |
raise TypeError("Cannot set an encoding with a unicode input, set %r" % encodings) | |
return HTMLUnicodeInputStream(source, **kwargs) | |
else: | |
return HTMLBinaryInputStream(source, **kwargs) | |
class HTMLUnicodeInputStream(object): | |
"""Provides a unicode stream of characters to the HTMLTokenizer. | |
This class takes care of character encoding and removing or replacing | |
incorrect byte-sequences and also provides column and line tracking. | |
""" | |
_defaultChunkSize = 10240 | |
def __init__(self, source): | |
"""Initialises the HTMLInputStream. | |
HTMLInputStream(source, [encoding]) -> Normalized stream from source | |
for use by html5lib. | |
source can be either a file-object, local filename or a string. | |
The optional encoding parameter must be a string that indicates | |
the encoding. If specified, that encoding will be used, | |
regardless of any BOM or later declaration (such as in a meta | |
element) | |
""" | |
if not _utils.supports_lone_surrogates: | |
# Such platforms will have already checked for such | |
# surrogate errors, so no need to do this checking. | |
self.reportCharacterErrors = None | |
elif len("\U0010FFFF") == 1: | |
self.reportCharacterErrors = self.characterErrorsUCS4 | |
else: | |
self.reportCharacterErrors = self.characterErrorsUCS2 | |
# List of where new lines occur | |
self.newLines = [0] | |
self.charEncoding = (lookupEncoding("utf-8"), "certain") | |
self.dataStream = self.openStream(source) | |
self.reset() | |
def reset(self): | |
self.chunk = "" | |
self.chunkSize = 0 | |
self.chunkOffset = 0 | |
self.errors = [] | |
# number of (complete) lines in previous chunks | |
self.prevNumLines = 0 | |
# number of columns in the last line of the previous chunk | |
self.prevNumCols = 0 | |
# Deal with CR LF and surrogates split over chunk boundaries | |
self._bufferedCharacter = None | |
def openStream(self, source): | |
"""Produces a file object from source. | |
source can be either a file object, local filename or a string. | |
""" | |
# Already a file object | |
if hasattr(source, 'read'): | |
stream = source | |
else: | |
stream = StringIO(source) | |
return stream | |
def _position(self, offset): | |
chunk = self.chunk | |
nLines = chunk.count('\n', 0, offset) | |
positionLine = self.prevNumLines + nLines | |
lastLinePos = chunk.rfind('\n', 0, offset) | |
if lastLinePos == -1: | |
positionColumn = self.prevNumCols + offset | |
else: | |
positionColumn = offset - (lastLinePos + 1) | |
return (positionLine, positionColumn) | |
def position(self): | |
"""Returns (line, col) of the current position in the stream.""" | |
line, col = self._position(self.chunkOffset) | |
return (line + 1, col) | |
def char(self): | |
""" Read one character from the stream or queue if available. Return | |
EOF when EOF is reached. | |
""" | |
# Read a new chunk from the input stream if necessary | |
if self.chunkOffset >= self.chunkSize: | |
if not self.readChunk(): | |
return EOF | |
chunkOffset = self.chunkOffset | |
char = self.chunk[chunkOffset] | |
self.chunkOffset = chunkOffset + 1 | |
return char | |
def readChunk(self, chunkSize=None): | |
if chunkSize is None: | |
chunkSize = self._defaultChunkSize | |
self.prevNumLines, self.prevNumCols = self._position(self.chunkSize) | |
self.chunk = "" | |
self.chunkSize = 0 | |
self.chunkOffset = 0 | |
data = self.dataStream.read(chunkSize) | |
# Deal with CR LF and surrogates broken across chunks | |
if self._bufferedCharacter: | |
data = self._bufferedCharacter + data | |
self._bufferedCharacter = None | |
elif not data: | |
# We have no more data, bye-bye stream | |
return False | |
if len(data) > 1: | |
lastv = ord(data[-1]) | |
if lastv == 0x0D or 0xD800 <= lastv <= 0xDBFF: | |
self._bufferedCharacter = data[-1] | |
data = data[:-1] | |
if self.reportCharacterErrors: | |
self.reportCharacterErrors(data) | |
# Replace invalid characters | |
data = data.replace("\r\n", "\n") | |
data = data.replace("\r", "\n") | |
self.chunk = data | |
self.chunkSize = len(data) | |
return True | |
def characterErrorsUCS4(self, data): | |
for _ in range(len(invalid_unicode_re.findall(data))): | |
self.errors.append("invalid-codepoint") | |
def characterErrorsUCS2(self, data): | |
# Someone picked the wrong compile option | |
# You lose | |
skip = False | |
for match in invalid_unicode_re.finditer(data): | |
if skip: | |
continue | |
codepoint = ord(match.group()) | |
pos = match.start() | |
# Pretty sure there should be endianness issues here | |
if _utils.isSurrogatePair(data[pos:pos + 2]): | |
# We have a surrogate pair! | |
char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2]) | |
if char_val in non_bmp_invalid_codepoints: | |
self.errors.append("invalid-codepoint") | |
skip = True | |
elif (codepoint >= 0xD800 and codepoint <= 0xDFFF and | |
pos == len(data) - 1): | |
self.errors.append("invalid-codepoint") | |
else: | |
skip = False | |
self.errors.append("invalid-codepoint") | |
def charsUntil(self, characters, opposite=False): | |
""" Returns a string of characters from the stream up to but not | |
including any character in 'characters' or EOF. 'characters' must be | |
a container that supports the 'in' method and iteration over its | |
characters. | |
""" | |
# Use a cache of regexps to find the required characters | |
try: | |
chars = charsUntilRegEx[(characters, opposite)] | |
except KeyError: | |
if __debug__: | |
for c in characters: | |
assert(ord(c) < 128) | |
regex = "".join(["\\x%02x" % ord(c) for c in characters]) | |
if not opposite: | |
regex = "^%s" % regex | |
chars = charsUntilRegEx[(characters, opposite)] = re.compile("[%s]+" % regex) | |
rv = [] | |
while True: | |
# Find the longest matching prefix | |
m = chars.match(self.chunk, self.chunkOffset) | |
if m is None: | |
# If nothing matched, and it wasn't because we ran out of chunk, | |
# then stop | |
if self.chunkOffset != self.chunkSize: | |
break | |
else: | |
end = m.end() | |
# If not the whole chunk matched, return everything | |
# up to the part that didn't match | |
if end != self.chunkSize: | |
rv.append(self.chunk[self.chunkOffset:end]) | |
self.chunkOffset = end | |
break | |
# If the whole remainder of the chunk matched, | |
# use it all and read the next chunk | |
rv.append(self.chunk[self.chunkOffset:]) | |
if not self.readChunk(): | |
# Reached EOF | |
break | |
r = "".join(rv) | |
return r | |
def unget(self, char): | |
# Only one character is allowed to be ungotten at once - it must | |
# be consumed again before any further call to unget | |
if char is not None: | |
if self.chunkOffset == 0: | |
# unget is called quite rarely, so it's a good idea to do | |
# more work here if it saves a bit of work in the frequently | |
# called char and charsUntil. | |
# So, just prepend the ungotten character onto the current | |
# chunk: | |
self.chunk = char + self.chunk | |
self.chunkSize += 1 | |
else: | |
self.chunkOffset -= 1 | |
assert self.chunk[self.chunkOffset] == char | |
class HTMLBinaryInputStream(HTMLUnicodeInputStream): | |
"""Provides a unicode stream of characters to the HTMLTokenizer. | |
This class takes care of character encoding and removing or replacing | |
incorrect byte-sequences and also provides column and line tracking. | |
""" | |
def __init__(self, source, override_encoding=None, transport_encoding=None, | |
same_origin_parent_encoding=None, likely_encoding=None, | |
default_encoding="windows-1252", useChardet=True): | |
"""Initialises the HTMLInputStream. | |
HTMLInputStream(source, [encoding]) -> Normalized stream from source | |
for use by html5lib. | |
source can be either a file-object, local filename or a string. | |
The optional encoding parameter must be a string that indicates | |
the encoding. If specified, that encoding will be used, | |
regardless of any BOM or later declaration (such as in a meta | |
element) | |
""" | |
# Raw Stream - for unicode objects this will encode to utf-8 and set | |
# self.charEncoding as appropriate | |
self.rawStream = self.openStream(source) | |
HTMLUnicodeInputStream.__init__(self, self.rawStream) | |
# Encoding Information | |
# Number of bytes to use when looking for a meta element with | |
# encoding information | |
self.numBytesMeta = 1024 | |
# Number of bytes to use when using detecting encoding using chardet | |
self.numBytesChardet = 100 | |
# Things from args | |
self.override_encoding = override_encoding | |
self.transport_encoding = transport_encoding | |
self.same_origin_parent_encoding = same_origin_parent_encoding | |
self.likely_encoding = likely_encoding | |
self.default_encoding = default_encoding | |
# Determine encoding | |
self.charEncoding = self.determineEncoding(useChardet) | |
assert self.charEncoding[0] is not None | |
# Call superclass | |
self.reset() | |
def reset(self): | |
self.dataStream = self.charEncoding[0].codec_info.streamreader(self.rawStream, 'replace') | |
HTMLUnicodeInputStream.reset(self) | |
def openStream(self, source): | |
"""Produces a file object from source. | |
source can be either a file object, local filename or a string. | |
""" | |
# Already a file object | |
if hasattr(source, 'read'): | |
stream = source | |
else: | |
stream = BytesIO(source) | |
try: | |
stream.seek(stream.tell()) | |
except: # pylint:disable=bare-except | |
stream = BufferedStream(stream) | |
return stream | |
def determineEncoding(self, chardet=True): | |
# BOMs take precedence over everything | |
# This will also read past the BOM if present | |
charEncoding = self.detectBOM(), "certain" | |
if charEncoding[0] is not None: | |
return charEncoding | |
# If we've been overriden, we've been overriden | |
charEncoding = lookupEncoding(self.override_encoding), "certain" | |
if charEncoding[0] is not None: | |
return charEncoding | |
# Now check the transport layer | |
charEncoding = lookupEncoding(self.transport_encoding), "certain" | |
if charEncoding[0] is not None: | |
return charEncoding | |
# Look for meta elements with encoding information | |
charEncoding = self.detectEncodingMeta(), "tentative" | |
if charEncoding[0] is not None: | |
return charEncoding | |
# Parent document encoding | |
charEncoding = lookupEncoding(self.same_origin_parent_encoding), "tentative" | |
if charEncoding[0] is not None and not charEncoding[0].name.startswith("utf-16"): | |
return charEncoding | |
# "likely" encoding | |
charEncoding = lookupEncoding(self.likely_encoding), "tentative" | |
if charEncoding[0] is not None: | |
return charEncoding | |
# Guess with chardet, if available | |
if chardet: | |
try: | |
from chardet.universaldetector import UniversalDetector | |
except ImportError: | |
pass | |
else: | |
buffers = [] | |
detector = UniversalDetector() | |
while not detector.done: | |
buffer = self.rawStream.read(self.numBytesChardet) | |
assert isinstance(buffer, bytes) | |
if not buffer: | |
break | |
buffers.append(buffer) | |
detector.feed(buffer) | |
detector.close() | |
encoding = lookupEncoding(detector.result['encoding']) | |
self.rawStream.seek(0) | |
if encoding is not None: | |
return encoding, "tentative" | |
# Try the default encoding | |
charEncoding = lookupEncoding(self.default_encoding), "tentative" | |
if charEncoding[0] is not None: | |
return charEncoding | |
# Fallback to html5lib's default if even that hasn't worked | |
return lookupEncoding("windows-1252"), "tentative" | |
def changeEncoding(self, newEncoding): | |
assert self.charEncoding[1] != "certain" | |
newEncoding = lookupEncoding(newEncoding) | |
if newEncoding is None: | |
return | |
if newEncoding.name in ("utf-16be", "utf-16le"): | |
newEncoding = lookupEncoding("utf-8") | |
assert newEncoding is not None | |
elif newEncoding == self.charEncoding[0]: | |
self.charEncoding = (self.charEncoding[0], "certain") | |
else: | |
self.rawStream.seek(0) | |
self.charEncoding = (newEncoding, "certain") | |
self.reset() | |
raise ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding)) | |
def detectBOM(self): | |
"""Attempts to detect at BOM at the start of the stream. If | |
an encoding can be determined from the BOM return the name of the | |
encoding otherwise return None""" | |
bomDict = { | |
codecs.BOM_UTF8: 'utf-8', | |
codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be', | |
codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be' | |
} | |
# Go to beginning of file and read in 4 bytes | |
string = self.rawStream.read(4) | |
assert isinstance(string, bytes) | |
# Try detecting the BOM using bytes from the string | |
encoding = bomDict.get(string[:3]) # UTF-8 | |
seek = 3 | |
if not encoding: | |
# Need to detect UTF-32 before UTF-16 | |
encoding = bomDict.get(string) # UTF-32 | |
seek = 4 | |
if not encoding: | |
encoding = bomDict.get(string[:2]) # UTF-16 | |
seek = 2 | |
# Set the read position past the BOM if one was found, otherwise | |
# set it to the start of the stream | |
if encoding: | |
self.rawStream.seek(seek) | |
return lookupEncoding(encoding) | |
else: | |
self.rawStream.seek(0) | |
return None | |
def detectEncodingMeta(self): | |
"""Report the encoding declared by the meta element | |
""" | |
buffer = self.rawStream.read(self.numBytesMeta) | |
assert isinstance(buffer, bytes) | |
parser = EncodingParser(buffer) | |
self.rawStream.seek(0) | |
encoding = parser.getEncoding() | |
if encoding is not None and encoding.name in ("utf-16be", "utf-16le"): | |
encoding = lookupEncoding("utf-8") | |
return encoding | |
class EncodingBytes(bytes): | |
"""String-like object with an associated position and various extra methods | |
If the position is ever greater than the string length then an exception is | |
raised""" | |
def __new__(self, value): | |
assert isinstance(value, bytes) | |
return bytes.__new__(self, value.lower()) | |
def __init__(self, value): | |
# pylint:disable=unused-argument | |
self._position = -1 | |
def __iter__(self): | |
return self | |
def __next__(self): | |
p = self._position = self._position + 1 | |
if p >= len(self): | |
raise StopIteration | |
elif p < 0: | |
raise TypeError | |
return self[p:p + 1] | |
def next(self): | |
# Py2 compat | |
return self.__next__() | |
def previous(self): | |
p = self._position | |
if p >= len(self): | |
raise StopIteration | |
elif p < 0: | |
raise TypeError | |
self._position = p = p - 1 | |
return self[p:p + 1] | |
def setPosition(self, position): | |
if self._position >= len(self): | |
raise StopIteration | |
self._position = position | |
def getPosition(self): | |
if self._position >= len(self): | |
raise StopIteration | |
if self._position >= 0: | |
return self._position | |
else: | |
return None | |
position = property(getPosition, setPosition) | |
def getCurrentByte(self): | |
return self[self.position:self.position + 1] | |
currentByte = property(getCurrentByte) | |
def skip(self, chars=spaceCharactersBytes): | |
"""Skip past a list of characters""" | |
p = self.position # use property for the error-checking | |
while p < len(self): | |
c = self[p:p + 1] | |
if c not in chars: | |
self._position = p | |
return c | |
p += 1 | |
self._position = p | |
return None | |
def skipUntil(self, chars): | |
p = self.position | |
while p < len(self): | |
c = self[p:p + 1] | |
if c in chars: | |
self._position = p | |
return c | |
p += 1 | |
self._position = p | |
return None | |
def matchBytes(self, bytes): | |
"""Look for a sequence of bytes at the start of a string. If the bytes | |
are found return True and advance the position to the byte after the | |
match. Otherwise return False and leave the position alone""" | |
p = self.position | |
data = self[p:p + len(bytes)] | |
rv = data.startswith(bytes) | |
if rv: | |
self.position += len(bytes) | |
return rv | |
def jumpTo(self, bytes): | |
"""Look for the next sequence of bytes matching a given sequence. If | |
a match is found advance the position to the last byte of the match""" | |
newPosition = self[self.position:].find(bytes) | |
if newPosition > -1: | |
# XXX: This is ugly, but I can't see a nicer way to fix this. | |
if self._position == -1: | |
self._position = 0 | |
self._position += (newPosition + len(bytes) - 1) | |
return True | |
else: | |
raise StopIteration | |
class EncodingParser(object): | |
"""Mini parser for detecting character encoding from meta elements""" | |
def __init__(self, data): | |
"""string - the data to work on for encoding detection""" | |
self.data = EncodingBytes(data) | |
self.encoding = None | |
def getEncoding(self): | |
methodDispatch = ( | |
(b"<!--", self.handleComment), | |
(b"<meta", self.handleMeta), | |
(b"</", self.handlePossibleEndTag), | |
(b"<!", self.handleOther), | |
(b"<?", self.handleOther), | |
(b"<", self.handlePossibleStartTag)) | |
for _ in self.data: | |
keepParsing = True | |
for key, method in methodDispatch: | |
if self.data.matchBytes(key): | |
try: | |
keepParsing = method() | |
break | |
except StopIteration: | |
keepParsing = False | |
break | |
if not keepParsing: | |
break | |
return self.encoding | |
def handleComment(self): | |
"""Skip over comments""" | |
return self.data.jumpTo(b"-->") | |
def handleMeta(self): | |
if self.data.currentByte not in spaceCharactersBytes: | |
# if we have <meta not followed by a space so just keep going | |
return True | |
# We have a valid meta element we want to search for attributes | |
hasPragma = False | |
pendingEncoding = None | |
while True: | |
# Try to find the next attribute after the current position | |
attr = self.getAttribute() | |
if attr is None: | |
return True | |
else: | |
if attr[0] == b"http-equiv": | |
hasPragma = attr[1] == b"content-type" | |
if hasPragma and pendingEncoding is not None: | |
self.encoding = pendingEncoding | |
return False | |
elif attr[0] == b"charset": | |
tentativeEncoding = attr[1] | |
codec = lookupEncoding(tentativeEncoding) | |
if codec is not None: | |
self.encoding = codec | |
return False | |
elif attr[0] == b"content": | |
contentParser = ContentAttrParser(EncodingBytes(attr[1])) | |
tentativeEncoding = contentParser.parse() | |
if tentativeEncoding is not None: | |
codec = lookupEncoding(tentativeEncoding) | |
if codec is not None: | |
if hasPragma: | |
self.encoding = codec | |
return False | |
else: | |
pendingEncoding = codec | |
def handlePossibleStartTag(self): | |
return self.handlePossibleTag(False) | |
def handlePossibleEndTag(self): | |
next(self.data) | |
return self.handlePossibleTag(True) | |
def handlePossibleTag(self, endTag): | |
data = self.data | |
if data.currentByte not in asciiLettersBytes: | |
# If the next byte is not an ascii letter either ignore this | |
# fragment (possible start tag case) or treat it according to | |
# handleOther | |
if endTag: | |
data.previous() | |
self.handleOther() | |
return True | |
c = data.skipUntil(spacesAngleBrackets) | |
if c == b"<": | |
# return to the first step in the overall "two step" algorithm | |
# reprocessing the < byte | |
data.previous() | |
else: | |
# Read all attributes | |
attr = self.getAttribute() | |
while attr is not None: | |
attr = self.getAttribute() | |
return True | |
def handleOther(self): | |
return self.data.jumpTo(b">") | |
def getAttribute(self): | |
"""Return a name,value pair for the next attribute in the stream, | |
if one is found, or None""" | |
data = self.data | |
# Step 1 (skip chars) | |
c = data.skip(spaceCharactersBytes | frozenset([b"/"])) | |
assert c is None or len(c) == 1 | |
# Step 2 | |
if c in (b">", None): | |
return None | |
# Step 3 | |
attrName = [] | |
attrValue = [] | |
# Step 4 attribute name | |
while True: | |
if c == b"=" and attrName: | |
break | |
elif c in spaceCharactersBytes: | |
# Step 6! | |
c = data.skip() | |
break | |
elif c in (b"/", b">"): | |
return b"".join(attrName), b"" | |
elif c in asciiUppercaseBytes: | |
attrName.append(c.lower()) | |
elif c is None: | |
return None | |
else: | |
attrName.append(c) | |
# Step 5 | |
c = next(data) | |
# Step 7 | |
if c != b"=": | |
data.previous() | |
return b"".join(attrName), b"" | |
# Step 8 | |
next(data) | |
# Step 9 | |
c = data.skip() | |
# Step 10 | |
if c in (b"'", b'"'): | |
# 10.1 | |
quoteChar = c | |
while True: | |
# 10.2 | |
c = next(data) | |
# 10.3 | |
if c == quoteChar: | |
next(data) | |
return b"".join(attrName), b"".join(attrValue) | |
# 10.4 | |
elif c in asciiUppercaseBytes: | |
attrValue.append(c.lower()) | |
# 10.5 | |
else: | |
attrValue.append(c) | |
elif c == b">": | |
return b"".join(attrName), b"" | |
elif c in asciiUppercaseBytes: | |
attrValue.append(c.lower()) | |
elif c is None: | |
return None | |
else: | |
attrValue.append(c) | |
# Step 11 | |
while True: | |
c = next(data) | |
if c in spacesAngleBrackets: | |
return b"".join(attrName), b"".join(attrValue) | |
elif c in asciiUppercaseBytes: | |
attrValue.append(c.lower()) | |
elif c is None: | |
return None | |
else: | |
attrValue.append(c) | |
class ContentAttrParser(object): | |
def __init__(self, data): | |
assert isinstance(data, bytes) | |
self.data = data | |
def parse(self): | |
try: | |
# Check if the attr name is charset | |
# otherwise return | |
self.data.jumpTo(b"charset") | |
self.data.position += 1 | |
self.data.skip() | |
if not self.data.currentByte == b"=": | |
# If there is no = sign keep looking for attrs | |
return None | |
self.data.position += 1 | |
self.data.skip() | |
# Look for an encoding between matching quote marks | |
if self.data.currentByte in (b'"', b"'"): | |
quoteMark = self.data.currentByte | |
self.data.position += 1 | |
oldPosition = self.data.position | |
if self.data.jumpTo(quoteMark): | |
return self.data[oldPosition:self.data.position] | |
else: | |
return None | |
else: | |
# Unquoted value | |
oldPosition = self.data.position | |
try: | |
self.data.skipUntil(spaceCharactersBytes) | |
return self.data[oldPosition:self.data.position] | |
except StopIteration: | |
# Return the whole remaining value | |
return self.data[oldPosition:] | |
except StopIteration: | |
return None | |
def lookupEncoding(encoding): | |
"""Return the python codec name corresponding to an encoding or None if the | |
string doesn't correspond to a valid encoding.""" | |
if isinstance(encoding, binary_type): | |
try: | |
encoding = encoding.decode("ascii") | |
except UnicodeDecodeError: | |
return None | |
if encoding is not None: | |
try: | |
return webencodings.lookup(encoding) | |
except AttributeError: | |
return None | |
else: | |
return None |
from __future__ import absolute_import, division, unicode_literals | |
from pip._vendor.six import unichr as chr | |
from collections import deque | |
from .constants import spaceCharacters | |
from .constants import entities | |
from .constants import asciiLetters, asciiUpper2Lower | |
from .constants import digits, hexDigits, EOF | |
from .constants import tokenTypes, tagTokenTypes | |
from .constants import replacementCharacters | |
from ._inputstream import HTMLInputStream | |
from ._trie import Trie | |
entitiesTrie = Trie(entities) | |
class HTMLTokenizer(object): | |
""" This class takes care of tokenizing HTML. | |
* self.currentToken | |
Holds the token that is currently being processed. | |
* self.state | |
Holds a reference to the method to be invoked... XXX | |
* self.stream | |
Points to HTMLInputStream object. | |
""" | |
def __init__(self, stream, parser=None, **kwargs): | |
self.stream = HTMLInputStream(stream, **kwargs) | |
self.parser = parser | |
# Setup the initial tokenizer state | |
self.escapeFlag = False | |
self.lastFourChars = [] | |
self.state = self.dataState | |
self.escape = False | |
# The current token being created | |
self.currentToken = None | |
super(HTMLTokenizer, self).__init__() | |
def __iter__(self): | |
""" This is where the magic happens. | |
We do our usually processing through the states and when we have a token | |
to return we yield the token which pauses processing until the next token | |
is requested. | |
""" | |
self.tokenQueue = deque([]) | |
# Start processing. When EOF is reached self.state will return False | |
# instead of True and the loop will terminate. | |
while self.state(): | |
while self.stream.errors: | |
yield {"type": tokenTypes["ParseError"], "data": self.stream.errors.pop(0)} | |
while self.tokenQueue: | |
yield self.tokenQueue.popleft() | |
def consumeNumberEntity(self, isHex): | |
"""This function returns either U+FFFD or the character based on the | |
decimal or hexadecimal representation. It also discards ";" if present. | |
If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked. | |
""" | |
allowed = digits | |
radix = 10 | |
if isHex: | |
allowed = hexDigits | |
radix = 16 | |
charStack = [] | |
# Consume all the characters that are in range while making sure we | |
# don't hit an EOF. | |
c = self.stream.char() | |
while c in allowed and c is not EOF: | |
charStack.append(c) | |
c = self.stream.char() | |
# Convert the set of characters consumed to an int. | |
charAsInt = int("".join(charStack), radix) | |
# Certain characters get replaced with others | |
if charAsInt in replacementCharacters: | |
char = replacementCharacters[charAsInt] | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"illegal-codepoint-for-numeric-entity", | |
"datavars": {"charAsInt": charAsInt}}) | |
elif ((0xD800 <= charAsInt <= 0xDFFF) or | |
(charAsInt > 0x10FFFF)): | |
char = "\uFFFD" | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"illegal-codepoint-for-numeric-entity", | |
"datavars": {"charAsInt": charAsInt}}) | |
else: | |
# Should speed up this check somehow (e.g. move the set to a constant) | |
if ((0x0001 <= charAsInt <= 0x0008) or | |
(0x000E <= charAsInt <= 0x001F) or | |
(0x007F <= charAsInt <= 0x009F) or | |
(0xFDD0 <= charAsInt <= 0xFDEF) or | |
charAsInt in frozenset([0x000B, 0xFFFE, 0xFFFF, 0x1FFFE, | |
0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, | |
0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, | |
0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, | |
0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, | |
0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE, | |
0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, | |
0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, | |
0xFFFFF, 0x10FFFE, 0x10FFFF])): | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": | |
"illegal-codepoint-for-numeric-entity", | |
"datavars": {"charAsInt": charAsInt}}) | |
try: | |
# Try/except needed as UCS-2 Python builds' unichar only works | |
# within the BMP. | |
char = chr(charAsInt) | |
except ValueError: | |
v = charAsInt - 0x10000 | |
char = chr(0xD800 | (v >> 10)) + chr(0xDC00 | (v & 0x3FF)) | |
# Discard the ; if present. Otherwise, put it back on the queue and | |
# invoke parseError on parser. | |
if c != ";": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"numeric-entity-without-semicolon"}) | |
self.stream.unget(c) | |
return char | |
def consumeEntity(self, allowedChar=None, fromAttribute=False): | |
# Initialise to the default output for when no entity is matched | |
output = "&" | |
charStack = [self.stream.char()] | |
if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&") or | |
(allowedChar is not None and allowedChar == charStack[0])): | |
self.stream.unget(charStack[0]) | |
elif charStack[0] == "#": | |
# Read the next character to see if it's hex or decimal | |
hex = False | |
charStack.append(self.stream.char()) | |
if charStack[-1] in ("x", "X"): | |
hex = True | |
charStack.append(self.stream.char()) | |
# charStack[-1] should be the first digit | |
if (hex and charStack[-1] in hexDigits) \ | |
or (not hex and charStack[-1] in digits): | |
# At least one digit found, so consume the whole number | |
self.stream.unget(charStack[-1]) | |
output = self.consumeNumberEntity(hex) | |
else: | |
# No digits found | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "expected-numeric-entity"}) | |
self.stream.unget(charStack.pop()) | |
output = "&" + "".join(charStack) | |
else: | |
# At this point in the process might have named entity. Entities | |
# are stored in the global variable "entities". | |
# | |
# Consume characters and compare to these to a substring of the | |
# entity names in the list until the substring no longer matches. | |
while (charStack[-1] is not EOF): | |
if not entitiesTrie.has_keys_with_prefix("".join(charStack)): | |
break | |
charStack.append(self.stream.char()) | |
# At this point we have a string that starts with some characters | |
# that may match an entity | |
# Try to find the longest entity the string will match to take care | |
# of ¬i for instance. | |
try: | |
entityName = entitiesTrie.longest_prefix("".join(charStack[:-1])) | |
entityLength = len(entityName) | |
except KeyError: | |
entityName = None | |
if entityName is not None: | |
if entityName[-1] != ";": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"named-entity-without-semicolon"}) | |
if (entityName[-1] != ";" and fromAttribute and | |
(charStack[entityLength] in asciiLetters or | |
charStack[entityLength] in digits or | |
charStack[entityLength] == "=")): | |
self.stream.unget(charStack.pop()) | |
output = "&" + "".join(charStack) | |
else: | |
output = entities[entityName] | |
self.stream.unget(charStack.pop()) | |
output += "".join(charStack[entityLength:]) | |
else: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"expected-named-entity"}) | |
self.stream.unget(charStack.pop()) | |
output = "&" + "".join(charStack) | |
if fromAttribute: | |
self.currentToken["data"][-1][1] += output | |
else: | |
if output in spaceCharacters: | |
tokenType = "SpaceCharacters" | |
else: | |
tokenType = "Characters" | |
self.tokenQueue.append({"type": tokenTypes[tokenType], "data": output}) | |
def processEntityInAttribute(self, allowedChar): | |
"""This method replaces the need for "entityInAttributeValueState". | |
""" | |
self.consumeEntity(allowedChar=allowedChar, fromAttribute=True) | |
def emitCurrentToken(self): | |
"""This method is a generic handler for emitting the tags. It also sets | |
the state to "data" because that's what's needed after a token has been | |
emitted. | |
""" | |
token = self.currentToken | |
# Add token to the queue to be yielded | |
if (token["type"] in tagTokenTypes): | |
token["name"] = token["name"].translate(asciiUpper2Lower) | |
if token["type"] == tokenTypes["EndTag"]: | |
if token["data"]: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "attributes-in-end-tag"}) | |
if token["selfClosing"]: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "self-closing-flag-on-end-tag"}) | |
self.tokenQueue.append(token) | |
self.state = self.dataState | |
# Below are the various tokenizer states worked out. | |
def dataState(self): | |
data = self.stream.char() | |
if data == "&": | |
self.state = self.entityDataState | |
elif data == "<": | |
self.state = self.tagOpenState | |
elif data == "\u0000": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
self.tokenQueue.append({"type": tokenTypes["Characters"], | |
"data": "\u0000"}) | |
elif data is EOF: | |
# Tokenization ends. | |
return False | |
elif data in spaceCharacters: | |
# Directly after emitting a token you switch back to the "data | |
# state". At that point spaceCharacters are important so they are | |
# emitted separately. | |
self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data": | |
data + self.stream.charsUntil(spaceCharacters, True)}) | |
# No need to update lastFourChars here, since the first space will | |
# have already been appended to lastFourChars and will have broken | |
# any <!-- or --> sequences | |
else: | |
chars = self.stream.charsUntil(("&", "<", "\u0000")) | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": | |
data + chars}) | |
return True | |
def entityDataState(self): | |
self.consumeEntity() | |
self.state = self.dataState | |
return True | |
def rcdataState(self): | |
data = self.stream.char() | |
if data == "&": | |
self.state = self.characterReferenceInRcdata | |
elif data == "<": | |
self.state = self.rcdataLessThanSignState | |
elif data == EOF: | |
# Tokenization ends. | |
return False | |
elif data == "\u0000": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
self.tokenQueue.append({"type": tokenTypes["Characters"], | |
"data": "\uFFFD"}) | |
elif data in spaceCharacters: | |
# Directly after emitting a token you switch back to the "data | |
# state". At that point spaceCharacters are important so they are | |
# emitted separately. | |
self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data": | |
data + self.stream.charsUntil(spaceCharacters, True)}) | |
# No need to update lastFourChars here, since the first space will | |
# have already been appended to lastFourChars and will have broken | |
# any <!-- or --> sequences | |
else: | |
chars = self.stream.charsUntil(("&", "<", "\u0000")) | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": | |
data + chars}) | |
return True | |
def characterReferenceInRcdata(self): | |
self.consumeEntity() | |
self.state = self.rcdataState | |
return True | |
def rawtextState(self): | |
data = self.stream.char() | |
if data == "<": | |
self.state = self.rawtextLessThanSignState | |
elif data == "\u0000": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
self.tokenQueue.append({"type": tokenTypes["Characters"], | |
"data": "\uFFFD"}) | |
elif data == EOF: | |
# Tokenization ends. | |
return False | |
else: | |
chars = self.stream.charsUntil(("<", "\u0000")) | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": | |
data + chars}) | |
return True | |
def scriptDataState(self): | |
data = self.stream.char() | |
if data == "<": | |
self.state = self.scriptDataLessThanSignState | |
elif data == "\u0000": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
self.tokenQueue.append({"type": tokenTypes["Characters"], | |
"data": "\uFFFD"}) | |
elif data == EOF: | |
# Tokenization ends. | |
return False | |
else: | |
chars = self.stream.charsUntil(("<", "\u0000")) | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": | |
data + chars}) | |
return True | |
def plaintextState(self): | |
data = self.stream.char() | |
if data == EOF: | |
# Tokenization ends. | |
return False | |
elif data == "\u0000": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
self.tokenQueue.append({"type": tokenTypes["Characters"], | |
"data": "\uFFFD"}) | |
else: | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": | |
data + self.stream.charsUntil("\u0000")}) | |
return True | |
def tagOpenState(self): | |
data = self.stream.char() | |
if data == "!": | |
self.state = self.markupDeclarationOpenState | |
elif data == "/": | |
self.state = self.closeTagOpenState | |
elif data in asciiLetters: | |
self.currentToken = {"type": tokenTypes["StartTag"], | |
"name": data, "data": [], | |
"selfClosing": False, | |
"selfClosingAcknowledged": False} | |
self.state = self.tagNameState | |
elif data == ">": | |
# XXX In theory it could be something besides a tag name. But | |
# do we really care? | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"expected-tag-name-but-got-right-bracket"}) | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<>"}) | |
self.state = self.dataState | |
elif data == "?": | |
# XXX In theory it could be something besides a tag name. But | |
# do we really care? | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"expected-tag-name-but-got-question-mark"}) | |
self.stream.unget(data) | |
self.state = self.bogusCommentState | |
else: | |
# XXX | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"expected-tag-name"}) | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) | |
self.stream.unget(data) | |
self.state = self.dataState | |
return True | |
def closeTagOpenState(self): | |
data = self.stream.char() | |
if data in asciiLetters: | |
self.currentToken = {"type": tokenTypes["EndTag"], "name": data, | |
"data": [], "selfClosing": False} | |
self.state = self.tagNameState | |
elif data == ">": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"expected-closing-tag-but-got-right-bracket"}) | |
self.state = self.dataState | |
elif data is EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"expected-closing-tag-but-got-eof"}) | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"}) | |
self.state = self.dataState | |
else: | |
# XXX data can be _'_... | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"expected-closing-tag-but-got-char", | |
"datavars": {"data": data}}) | |
self.stream.unget(data) | |
self.state = self.bogusCommentState | |
return True | |
def tagNameState(self): | |
data = self.stream.char() | |
if data in spaceCharacters: | |
self.state = self.beforeAttributeNameState | |
elif data == ">": | |
self.emitCurrentToken() | |
elif data is EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"eof-in-tag-name"}) | |
self.state = self.dataState | |
elif data == "/": | |
self.state = self.selfClosingStartTagState | |
elif data == "\u0000": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
self.currentToken["name"] += "\uFFFD" | |
else: | |
self.currentToken["name"] += data | |
# (Don't use charsUntil here, because tag names are | |
# very short and it's faster to not do anything fancy) | |
return True | |
def rcdataLessThanSignState(self): | |
data = self.stream.char() | |
if data == "/": | |
self.temporaryBuffer = "" | |
self.state = self.rcdataEndTagOpenState | |
else: | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) | |
self.stream.unget(data) | |
self.state = self.rcdataState | |
return True | |
def rcdataEndTagOpenState(self): | |
data = self.stream.char() | |
if data in asciiLetters: | |
self.temporaryBuffer += data | |
self.state = self.rcdataEndTagNameState | |
else: | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"}) | |
self.stream.unget(data) | |
self.state = self.rcdataState | |
return True | |
def rcdataEndTagNameState(self): | |
appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower() | |
data = self.stream.char() | |
if data in spaceCharacters and appropriate: | |
self.currentToken = {"type": tokenTypes["EndTag"], | |
"name": self.temporaryBuffer, | |
"data": [], "selfClosing": False} | |
self.state = self.beforeAttributeNameState | |
elif data == "/" and appropriate: | |
self.currentToken = {"type": tokenTypes["EndTag"], | |
"name": self.temporaryBuffer, | |
"data": [], "selfClosing": False} | |
self.state = self.selfClosingStartTagState | |
elif data == ">" and appropriate: | |
self.currentToken = {"type": tokenTypes["EndTag"], | |
"name": self.temporaryBuffer, | |
"data": [], "selfClosing": False} | |
self.emitCurrentToken() | |
self.state = self.dataState | |
elif data in asciiLetters: | |
self.temporaryBuffer += data | |
else: | |
self.tokenQueue.append({"type": tokenTypes["Characters"], | |
"data": "</" + self.temporaryBuffer}) | |
self.stream.unget(data) | |
self.state = self.rcdataState | |
return True | |
def rawtextLessThanSignState(self): | |
data = self.stream.char() | |
if data == "/": | |
self.temporaryBuffer = "" | |
self.state = self.rawtextEndTagOpenState | |
else: | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) | |
self.stream.unget(data) | |
self.state = self.rawtextState | |
return True | |
def rawtextEndTagOpenState(self): | |
data = self.stream.char() | |
if data in asciiLetters: | |
self.temporaryBuffer += data | |
self.state = self.rawtextEndTagNameState | |
else: | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"}) | |
self.stream.unget(data) | |
self.state = self.rawtextState | |
return True | |
def rawtextEndTagNameState(self): | |
appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower() | |
data = self.stream.char() | |
if data in spaceCharacters and appropriate: | |
self.currentToken = {"type": tokenTypes["EndTag"], | |
"name": self.temporaryBuffer, | |
"data": [], "selfClosing": False} | |
self.state = self.beforeAttributeNameState | |
elif data == "/" and appropriate: | |
self.currentToken = {"type": tokenTypes["EndTag"], | |
"name": self.temporaryBuffer, | |
"data": [], "selfClosing": False} | |
self.state = self.selfClosingStartTagState | |
elif data == ">" and appropriate: | |
self.currentToken = {"type": tokenTypes["EndTag"], | |
"name": self.temporaryBuffer, | |
"data": [], "selfClosing": False} | |
self.emitCurrentToken() | |
self.state = self.dataState | |
elif data in asciiLetters: | |
self.temporaryBuffer += data | |
else: | |
self.tokenQueue.append({"type": tokenTypes["Characters"], | |
"data": "</" + self.temporaryBuffer}) | |
self.stream.unget(data) | |
self.state = self.rawtextState | |
return True | |
def scriptDataLessThanSignState(self): | |
data = self.stream.char() | |
if data == "/": | |
self.temporaryBuffer = "" | |
self.state = self.scriptDataEndTagOpenState | |
elif data == "!": | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<!"}) | |
self.state = self.scriptDataEscapeStartState | |
else: | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) | |
self.stream.unget(data) | |
self.state = self.scriptDataState | |
return True | |
def scriptDataEndTagOpenState(self): | |
data = self.stream.char() | |
if data in asciiLetters: | |
self.temporaryBuffer += data | |
self.state = self.scriptDataEndTagNameState | |
else: | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"}) | |
self.stream.unget(data) | |
self.state = self.scriptDataState | |
return True | |
def scriptDataEndTagNameState(self): | |
appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower() | |
data = self.stream.char() | |
if data in spaceCharacters and appropriate: | |
self.currentToken = {"type": tokenTypes["EndTag"], | |
"name": self.temporaryBuffer, | |
"data": [], "selfClosing": False} | |
self.state = self.beforeAttributeNameState | |
elif data == "/" and appropriate: | |
self.currentToken = {"type": tokenTypes["EndTag"], | |
"name": self.temporaryBuffer, | |
"data": [], "selfClosing": False} | |
self.state = self.selfClosingStartTagState | |
elif data == ">" and appropriate: | |
self.currentToken = {"type": tokenTypes["EndTag"], | |
"name": self.temporaryBuffer, | |
"data": [], "selfClosing": False} | |
self.emitCurrentToken() | |
self.state = self.dataState | |
elif data in asciiLetters: | |
self.temporaryBuffer += data | |
else: | |
self.tokenQueue.append({"type": tokenTypes["Characters"], | |
"data": "</" + self.temporaryBuffer}) | |
self.stream.unget(data) | |
self.state = self.scriptDataState | |
return True | |
def scriptDataEscapeStartState(self): | |
data = self.stream.char() | |
if data == "-": | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) | |
self.state = self.scriptDataEscapeStartDashState | |
else: | |
self.stream.unget(data) | |
self.state = self.scriptDataState | |
return True | |
def scriptDataEscapeStartDashState(self): | |
data = self.stream.char() | |
if data == "-": | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) | |
self.state = self.scriptDataEscapedDashDashState | |
else: | |
self.stream.unget(data) | |
self.state = self.scriptDataState | |
return True | |
def scriptDataEscapedState(self): | |
data = self.stream.char() | |
if data == "-": | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) | |
self.state = self.scriptDataEscapedDashState | |
elif data == "<": | |
self.state = self.scriptDataEscapedLessThanSignState | |
elif data == "\u0000": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
self.tokenQueue.append({"type": tokenTypes["Characters"], | |
"data": "\uFFFD"}) | |
elif data == EOF: | |
self.state = self.dataState | |
else: | |
chars = self.stream.charsUntil(("<", "-", "\u0000")) | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": | |
data + chars}) | |
return True | |
def scriptDataEscapedDashState(self): | |
data = self.stream.char() | |
if data == "-": | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) | |
self.state = self.scriptDataEscapedDashDashState | |
elif data == "<": | |
self.state = self.scriptDataEscapedLessThanSignState | |
elif data == "\u0000": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
self.tokenQueue.append({"type": tokenTypes["Characters"], | |
"data": "\uFFFD"}) | |
self.state = self.scriptDataEscapedState | |
elif data == EOF: | |
self.state = self.dataState | |
else: | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) | |
self.state = self.scriptDataEscapedState | |
return True | |
def scriptDataEscapedDashDashState(self): | |
data = self.stream.char() | |
if data == "-": | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) | |
elif data == "<": | |
self.state = self.scriptDataEscapedLessThanSignState | |
elif data == ">": | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"}) | |
self.state = self.scriptDataState | |
elif data == "\u0000": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
self.tokenQueue.append({"type": tokenTypes["Characters"], | |
"data": "\uFFFD"}) | |
self.state = self.scriptDataEscapedState | |
elif data == EOF: | |
self.state = self.dataState | |
else: | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) | |
self.state = self.scriptDataEscapedState | |
return True | |
def scriptDataEscapedLessThanSignState(self): | |
data = self.stream.char() | |
if data == "/": | |
self.temporaryBuffer = "" | |
self.state = self.scriptDataEscapedEndTagOpenState | |
elif data in asciiLetters: | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<" + data}) | |
self.temporaryBuffer = data | |
self.state = self.scriptDataDoubleEscapeStartState | |
else: | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) | |
self.stream.unget(data) | |
self.state = self.scriptDataEscapedState | |
return True | |
def scriptDataEscapedEndTagOpenState(self): | |
data = self.stream.char() | |
if data in asciiLetters: | |
self.temporaryBuffer = data | |
self.state = self.scriptDataEscapedEndTagNameState | |
else: | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"}) | |
self.stream.unget(data) | |
self.state = self.scriptDataEscapedState | |
return True | |
def scriptDataEscapedEndTagNameState(self): | |
appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower() | |
data = self.stream.char() | |
if data in spaceCharacters and appropriate: | |
self.currentToken = {"type": tokenTypes["EndTag"], | |
"name": self.temporaryBuffer, | |
"data": [], "selfClosing": False} | |
self.state = self.beforeAttributeNameState | |
elif data == "/" and appropriate: | |
self.currentToken = {"type": tokenTypes["EndTag"], | |
"name": self.temporaryBuffer, | |
"data": [], "selfClosing": False} | |
self.state = self.selfClosingStartTagState | |
elif data == ">" and appropriate: | |
self.currentToken = {"type": tokenTypes["EndTag"], | |
"name": self.temporaryBuffer, | |
"data": [], "selfClosing": False} | |
self.emitCurrentToken() | |
self.state = self.dataState | |
elif data in asciiLetters: | |
self.temporaryBuffer += data | |
else: | |
self.tokenQueue.append({"type": tokenTypes["Characters"], | |
"data": "</" + self.temporaryBuffer}) | |
self.stream.unget(data) | |
self.state = self.scriptDataEscapedState | |
return True | |
def scriptDataDoubleEscapeStartState(self): | |
data = self.stream.char() | |
if data in (spaceCharacters | frozenset(("/", ">"))): | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) | |
if self.temporaryBuffer.lower() == "script": | |
self.state = self.scriptDataDoubleEscapedState | |
else: | |
self.state = self.scriptDataEscapedState | |
elif data in asciiLetters: | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) | |
self.temporaryBuffer += data | |
else: | |
self.stream.unget(data) | |
self.state = self.scriptDataEscapedState | |
return True | |
def scriptDataDoubleEscapedState(self): | |
data = self.stream.char() | |
if data == "-": | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) | |
self.state = self.scriptDataDoubleEscapedDashState | |
elif data == "<": | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) | |
self.state = self.scriptDataDoubleEscapedLessThanSignState | |
elif data == "\u0000": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
self.tokenQueue.append({"type": tokenTypes["Characters"], | |
"data": "\uFFFD"}) | |
elif data == EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"eof-in-script-in-script"}) | |
self.state = self.dataState | |
else: | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) | |
return True | |
def scriptDataDoubleEscapedDashState(self): | |
data = self.stream.char() | |
if data == "-": | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) | |
self.state = self.scriptDataDoubleEscapedDashDashState | |
elif data == "<": | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) | |
self.state = self.scriptDataDoubleEscapedLessThanSignState | |
elif data == "\u0000": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
self.tokenQueue.append({"type": tokenTypes["Characters"], | |
"data": "\uFFFD"}) | |
self.state = self.scriptDataDoubleEscapedState | |
elif data == EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"eof-in-script-in-script"}) | |
self.state = self.dataState | |
else: | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) | |
self.state = self.scriptDataDoubleEscapedState | |
return True | |
def scriptDataDoubleEscapedDashDashState(self): | |
data = self.stream.char() | |
if data == "-": | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) | |
elif data == "<": | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) | |
self.state = self.scriptDataDoubleEscapedLessThanSignState | |
elif data == ">": | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"}) | |
self.state = self.scriptDataState | |
elif data == "\u0000": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
self.tokenQueue.append({"type": tokenTypes["Characters"], | |
"data": "\uFFFD"}) | |
self.state = self.scriptDataDoubleEscapedState | |
elif data == EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"eof-in-script-in-script"}) | |
self.state = self.dataState | |
else: | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) | |
self.state = self.scriptDataDoubleEscapedState | |
return True | |
def scriptDataDoubleEscapedLessThanSignState(self): | |
data = self.stream.char() | |
if data == "/": | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "/"}) | |
self.temporaryBuffer = "" | |
self.state = self.scriptDataDoubleEscapeEndState | |
else: | |
self.stream.unget(data) | |
self.state = self.scriptDataDoubleEscapedState | |
return True | |
def scriptDataDoubleEscapeEndState(self): | |
data = self.stream.char() | |
if data in (spaceCharacters | frozenset(("/", ">"))): | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) | |
if self.temporaryBuffer.lower() == "script": | |
self.state = self.scriptDataEscapedState | |
else: | |
self.state = self.scriptDataDoubleEscapedState | |
elif data in asciiLetters: | |
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) | |
self.temporaryBuffer += data | |
else: | |
self.stream.unget(data) | |
self.state = self.scriptDataDoubleEscapedState | |
return True | |
def beforeAttributeNameState(self): | |
data = self.stream.char() | |
if data in spaceCharacters: | |
self.stream.charsUntil(spaceCharacters, True) | |
elif data in asciiLetters: | |
self.currentToken["data"].append([data, ""]) | |
self.state = self.attributeNameState | |
elif data == ">": | |
self.emitCurrentToken() | |
elif data == "/": | |
self.state = self.selfClosingStartTagState | |
elif data in ("'", '"', "=", "<"): | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"invalid-character-in-attribute-name"}) | |
self.currentToken["data"].append([data, ""]) | |
self.state = self.attributeNameState | |
elif data == "\u0000": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
self.currentToken["data"].append(["\uFFFD", ""]) | |
self.state = self.attributeNameState | |
elif data is EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"expected-attribute-name-but-got-eof"}) | |
self.state = self.dataState | |
else: | |
self.currentToken["data"].append([data, ""]) | |
self.state = self.attributeNameState | |
return True | |
def attributeNameState(self): | |
data = self.stream.char() | |
leavingThisState = True | |
emitToken = False | |
if data == "=": | |
self.state = self.beforeAttributeValueState | |
elif data in asciiLetters: | |
self.currentToken["data"][-1][0] += data +\ | |
self.stream.charsUntil(asciiLetters, True) | |
leavingThisState = False | |
elif data == ">": | |
# XXX If we emit here the attributes are converted to a dict | |
# without being checked and when the code below runs we error | |
# because data is a dict not a list | |
emitToken = True | |
elif data in spaceCharacters: | |
self.state = self.afterAttributeNameState | |
elif data == "/": | |
self.state = self.selfClosingStartTagState | |
elif data == "\u0000": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
self.currentToken["data"][-1][0] += "\uFFFD" | |
leavingThisState = False | |
elif data in ("'", '"', "<"): | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": | |
"invalid-character-in-attribute-name"}) | |
self.currentToken["data"][-1][0] += data | |
leavingThisState = False | |
elif data is EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "eof-in-attribute-name"}) | |
self.state = self.dataState | |
else: | |
self.currentToken["data"][-1][0] += data | |
leavingThisState = False | |
if leavingThisState: | |
# Attributes are not dropped at this stage. That happens when the | |
# start tag token is emitted so values can still be safely appended | |
# to attributes, but we do want to report the parse error in time. | |
self.currentToken["data"][-1][0] = ( | |
self.currentToken["data"][-1][0].translate(asciiUpper2Lower)) | |
for name, _ in self.currentToken["data"][:-1]: | |
if self.currentToken["data"][-1][0] == name: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"duplicate-attribute"}) | |
break | |
# XXX Fix for above XXX | |
if emitToken: | |
self.emitCurrentToken() | |
return True | |
def afterAttributeNameState(self): | |
data = self.stream.char() | |
if data in spaceCharacters: | |
self.stream.charsUntil(spaceCharacters, True) | |
elif data == "=": | |
self.state = self.beforeAttributeValueState | |
elif data == ">": | |
self.emitCurrentToken() | |
elif data in asciiLetters: | |
self.currentToken["data"].append([data, ""]) | |
self.state = self.attributeNameState | |
elif data == "/": | |
self.state = self.selfClosingStartTagState | |
elif data == "\u0000": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
self.currentToken["data"].append(["\uFFFD", ""]) | |
self.state = self.attributeNameState | |
elif data in ("'", '"', "<"): | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"invalid-character-after-attribute-name"}) | |
self.currentToken["data"].append([data, ""]) | |
self.state = self.attributeNameState | |
elif data is EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"expected-end-of-tag-but-got-eof"}) | |
self.state = self.dataState | |
else: | |
self.currentToken["data"].append([data, ""]) | |
self.state = self.attributeNameState | |
return True | |
def beforeAttributeValueState(self): | |
data = self.stream.char() | |
if data in spaceCharacters: | |
self.stream.charsUntil(spaceCharacters, True) | |
elif data == "\"": | |
self.state = self.attributeValueDoubleQuotedState | |
elif data == "&": | |
self.state = self.attributeValueUnQuotedState | |
self.stream.unget(data) | |
elif data == "'": | |
self.state = self.attributeValueSingleQuotedState | |
elif data == ">": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"expected-attribute-value-but-got-right-bracket"}) | |
self.emitCurrentToken() | |
elif data == "\u0000": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
self.currentToken["data"][-1][1] += "\uFFFD" | |
self.state = self.attributeValueUnQuotedState | |
elif data in ("=", "<", "`"): | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"equals-in-unquoted-attribute-value"}) | |
self.currentToken["data"][-1][1] += data | |
self.state = self.attributeValueUnQuotedState | |
elif data is EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"expected-attribute-value-but-got-eof"}) | |
self.state = self.dataState | |
else: | |
self.currentToken["data"][-1][1] += data | |
self.state = self.attributeValueUnQuotedState | |
return True | |
def attributeValueDoubleQuotedState(self): | |
data = self.stream.char() | |
if data == "\"": | |
self.state = self.afterAttributeValueState | |
elif data == "&": | |
self.processEntityInAttribute('"') | |
elif data == "\u0000": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
self.currentToken["data"][-1][1] += "\uFFFD" | |
elif data is EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"eof-in-attribute-value-double-quote"}) | |
self.state = self.dataState | |
else: | |
self.currentToken["data"][-1][1] += data +\ | |
self.stream.charsUntil(("\"", "&", "\u0000")) | |
return True | |
def attributeValueSingleQuotedState(self): | |
data = self.stream.char() | |
if data == "'": | |
self.state = self.afterAttributeValueState | |
elif data == "&": | |
self.processEntityInAttribute("'") | |
elif data == "\u0000": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
self.currentToken["data"][-1][1] += "\uFFFD" | |
elif data is EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"eof-in-attribute-value-single-quote"}) | |
self.state = self.dataState | |
else: | |
self.currentToken["data"][-1][1] += data +\ | |
self.stream.charsUntil(("'", "&", "\u0000")) | |
return True | |
def attributeValueUnQuotedState(self): | |
data = self.stream.char() | |
if data in spaceCharacters: | |
self.state = self.beforeAttributeNameState | |
elif data == "&": | |
self.processEntityInAttribute(">") | |
elif data == ">": | |
self.emitCurrentToken() | |
elif data in ('"', "'", "=", "<", "`"): | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"unexpected-character-in-unquoted-attribute-value"}) | |
self.currentToken["data"][-1][1] += data | |
elif data == "\u0000": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
self.currentToken["data"][-1][1] += "\uFFFD" | |
elif data is EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"eof-in-attribute-value-no-quotes"}) | |
self.state = self.dataState | |
else: | |
self.currentToken["data"][-1][1] += data + self.stream.charsUntil( | |
frozenset(("&", ">", '"', "'", "=", "<", "`", "\u0000")) | spaceCharacters) | |
return True | |
def afterAttributeValueState(self): | |
data = self.stream.char() | |
if data in spaceCharacters: | |
self.state = self.beforeAttributeNameState | |
elif data == ">": | |
self.emitCurrentToken() | |
elif data == "/": | |
self.state = self.selfClosingStartTagState | |
elif data is EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"unexpected-EOF-after-attribute-value"}) | |
self.stream.unget(data) | |
self.state = self.dataState | |
else: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"unexpected-character-after-attribute-value"}) | |
self.stream.unget(data) | |
self.state = self.beforeAttributeNameState | |
return True | |
def selfClosingStartTagState(self): | |
data = self.stream.char() | |
if data == ">": | |
self.currentToken["selfClosing"] = True | |
self.emitCurrentToken() | |
elif data is EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": | |
"unexpected-EOF-after-solidus-in-tag"}) | |
self.stream.unget(data) | |
self.state = self.dataState | |
else: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"unexpected-character-after-solidus-in-tag"}) | |
self.stream.unget(data) | |
self.state = self.beforeAttributeNameState | |
return True | |
def bogusCommentState(self): | |
# Make a new comment token and give it as value all the characters | |
# until the first > or EOF (charsUntil checks for EOF automatically) | |
# and emit it. | |
data = self.stream.charsUntil(">") | |
data = data.replace("\u0000", "\uFFFD") | |
self.tokenQueue.append( | |
{"type": tokenTypes["Comment"], "data": data}) | |
# Eat the character directly after the bogus comment which is either a | |
# ">" or an EOF. | |
self.stream.char() | |
self.state = self.dataState | |
return True | |
def markupDeclarationOpenState(self): | |
charStack = [self.stream.char()] | |
if charStack[-1] == "-": | |
charStack.append(self.stream.char()) | |
if charStack[-1] == "-": | |
self.currentToken = {"type": tokenTypes["Comment"], "data": ""} | |
self.state = self.commentStartState | |
return True | |
elif charStack[-1] in ('d', 'D'): | |
matched = True | |
for expected in (('o', 'O'), ('c', 'C'), ('t', 'T'), | |
('y', 'Y'), ('p', 'P'), ('e', 'E')): | |
charStack.append(self.stream.char()) | |
if charStack[-1] not in expected: | |
matched = False | |
break | |
if matched: | |
self.currentToken = {"type": tokenTypes["Doctype"], | |
"name": "", | |
"publicId": None, "systemId": None, | |
"correct": True} | |
self.state = self.doctypeState | |
return True | |
elif (charStack[-1] == "[" and | |
self.parser is not None and | |
self.parser.tree.openElements and | |
self.parser.tree.openElements[-1].namespace != self.parser.tree.defaultNamespace): | |
matched = True | |
for expected in ["C", "D", "A", "T", "A", "["]: | |
charStack.append(self.stream.char()) | |
if charStack[-1] != expected: | |
matched = False | |
break | |
if matched: | |
self.state = self.cdataSectionState | |
return True | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"expected-dashes-or-doctype"}) | |
while charStack: | |
self.stream.unget(charStack.pop()) | |
self.state = self.bogusCommentState | |
return True | |
def commentStartState(self): | |
data = self.stream.char() | |
if data == "-": | |
self.state = self.commentStartDashState | |
elif data == "\u0000": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
self.currentToken["data"] += "\uFFFD" | |
elif data == ">": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"incorrect-comment"}) | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
elif data is EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"eof-in-comment"}) | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
else: | |
self.currentToken["data"] += data | |
self.state = self.commentState | |
return True | |
def commentStartDashState(self): | |
data = self.stream.char() | |
if data == "-": | |
self.state = self.commentEndState | |
elif data == "\u0000": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
self.currentToken["data"] += "-\uFFFD" | |
elif data == ">": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"incorrect-comment"}) | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
elif data is EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"eof-in-comment"}) | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
else: | |
self.currentToken["data"] += "-" + data | |
self.state = self.commentState | |
return True | |
def commentState(self): | |
data = self.stream.char() | |
if data == "-": | |
self.state = self.commentEndDashState | |
elif data == "\u0000": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
self.currentToken["data"] += "\uFFFD" | |
elif data is EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "eof-in-comment"}) | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
else: | |
self.currentToken["data"] += data + \ | |
self.stream.charsUntil(("-", "\u0000")) | |
return True | |
def commentEndDashState(self): | |
data = self.stream.char() | |
if data == "-": | |
self.state = self.commentEndState | |
elif data == "\u0000": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
self.currentToken["data"] += "-\uFFFD" | |
self.state = self.commentState | |
elif data is EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"eof-in-comment-end-dash"}) | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
else: | |
self.currentToken["data"] += "-" + data | |
self.state = self.commentState | |
return True | |
def commentEndState(self): | |
data = self.stream.char() | |
if data == ">": | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
elif data == "\u0000": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
self.currentToken["data"] += "--\uFFFD" | |
self.state = self.commentState | |
elif data == "!": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"unexpected-bang-after-double-dash-in-comment"}) | |
self.state = self.commentEndBangState | |
elif data == "-": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"unexpected-dash-after-double-dash-in-comment"}) | |
self.currentToken["data"] += data | |
elif data is EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"eof-in-comment-double-dash"}) | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
else: | |
# XXX | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"unexpected-char-in-comment"}) | |
self.currentToken["data"] += "--" + data | |
self.state = self.commentState | |
return True | |
def commentEndBangState(self): | |
data = self.stream.char() | |
if data == ">": | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
elif data == "-": | |
self.currentToken["data"] += "--!" | |
self.state = self.commentEndDashState | |
elif data == "\u0000": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
self.currentToken["data"] += "--!\uFFFD" | |
self.state = self.commentState | |
elif data is EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"eof-in-comment-end-bang-state"}) | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
else: | |
self.currentToken["data"] += "--!" + data | |
self.state = self.commentState | |
return True | |
def doctypeState(self): | |
data = self.stream.char() | |
if data in spaceCharacters: | |
self.state = self.beforeDoctypeNameState | |
elif data is EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"expected-doctype-name-but-got-eof"}) | |
self.currentToken["correct"] = False | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
else: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"need-space-after-doctype"}) | |
self.stream.unget(data) | |
self.state = self.beforeDoctypeNameState | |
return True | |
def beforeDoctypeNameState(self): | |
data = self.stream.char() | |
if data in spaceCharacters: | |
pass | |
elif data == ">": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"expected-doctype-name-but-got-right-bracket"}) | |
self.currentToken["correct"] = False | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
elif data == "\u0000": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
self.currentToken["name"] = "\uFFFD" | |
self.state = self.doctypeNameState | |
elif data is EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"expected-doctype-name-but-got-eof"}) | |
self.currentToken["correct"] = False | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
else: | |
self.currentToken["name"] = data | |
self.state = self.doctypeNameState | |
return True | |
def doctypeNameState(self): | |
data = self.stream.char() | |
if data in spaceCharacters: | |
self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) | |
self.state = self.afterDoctypeNameState | |
elif data == ">": | |
self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
elif data == "\u0000": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
self.currentToken["name"] += "\uFFFD" | |
self.state = self.doctypeNameState | |
elif data is EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"eof-in-doctype-name"}) | |
self.currentToken["correct"] = False | |
self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
else: | |
self.currentToken["name"] += data | |
return True | |
def afterDoctypeNameState(self): | |
data = self.stream.char() | |
if data in spaceCharacters: | |
pass | |
elif data == ">": | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
elif data is EOF: | |
self.currentToken["correct"] = False | |
self.stream.unget(data) | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"eof-in-doctype"}) | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
else: | |
if data in ("p", "P"): | |
matched = True | |
for expected in (("u", "U"), ("b", "B"), ("l", "L"), | |
("i", "I"), ("c", "C")): | |
data = self.stream.char() | |
if data not in expected: | |
matched = False | |
break | |
if matched: | |
self.state = self.afterDoctypePublicKeywordState | |
return True | |
elif data in ("s", "S"): | |
matched = True | |
for expected in (("y", "Y"), ("s", "S"), ("t", "T"), | |
("e", "E"), ("m", "M")): | |
data = self.stream.char() | |
if data not in expected: | |
matched = False | |
break | |
if matched: | |
self.state = self.afterDoctypeSystemKeywordState | |
return True | |
# All the characters read before the current 'data' will be | |
# [a-zA-Z], so they're garbage in the bogus doctype and can be | |
# discarded; only the latest character might be '>' or EOF | |
# and needs to be ungetted | |
self.stream.unget(data) | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"expected-space-or-right-bracket-in-doctype", "datavars": | |
{"data": data}}) | |
self.currentToken["correct"] = False | |
self.state = self.bogusDoctypeState | |
return True | |
def afterDoctypePublicKeywordState(self): | |
data = self.stream.char() | |
if data in spaceCharacters: | |
self.state = self.beforeDoctypePublicIdentifierState | |
elif data in ("'", '"'): | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"unexpected-char-in-doctype"}) | |
self.stream.unget(data) | |
self.state = self.beforeDoctypePublicIdentifierState | |
elif data is EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"eof-in-doctype"}) | |
self.currentToken["correct"] = False | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
else: | |
self.stream.unget(data) | |
self.state = self.beforeDoctypePublicIdentifierState | |
return True | |
def beforeDoctypePublicIdentifierState(self): | |
data = self.stream.char() | |
if data in spaceCharacters: | |
pass | |
elif data == "\"": | |
self.currentToken["publicId"] = "" | |
self.state = self.doctypePublicIdentifierDoubleQuotedState | |
elif data == "'": | |
self.currentToken["publicId"] = "" | |
self.state = self.doctypePublicIdentifierSingleQuotedState | |
elif data == ">": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"unexpected-end-of-doctype"}) | |
self.currentToken["correct"] = False | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
elif data is EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"eof-in-doctype"}) | |
self.currentToken["correct"] = False | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
else: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"unexpected-char-in-doctype"}) | |
self.currentToken["correct"] = False | |
self.state = self.bogusDoctypeState | |
return True | |
def doctypePublicIdentifierDoubleQuotedState(self): | |
data = self.stream.char() | |
if data == "\"": | |
self.state = self.afterDoctypePublicIdentifierState | |
elif data == "\u0000": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
self.currentToken["publicId"] += "\uFFFD" | |
elif data == ">": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"unexpected-end-of-doctype"}) | |
self.currentToken["correct"] = False | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
elif data is EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"eof-in-doctype"}) | |
self.currentToken["correct"] = False | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
else: | |
self.currentToken["publicId"] += data | |
return True | |
def doctypePublicIdentifierSingleQuotedState(self): | |
data = self.stream.char() | |
if data == "'": | |
self.state = self.afterDoctypePublicIdentifierState | |
elif data == "\u0000": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
self.currentToken["publicId"] += "\uFFFD" | |
elif data == ">": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"unexpected-end-of-doctype"}) | |
self.currentToken["correct"] = False | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
elif data is EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"eof-in-doctype"}) | |
self.currentToken["correct"] = False | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
else: | |
self.currentToken["publicId"] += data | |
return True | |
def afterDoctypePublicIdentifierState(self): | |
data = self.stream.char() | |
if data in spaceCharacters: | |
self.state = self.betweenDoctypePublicAndSystemIdentifiersState | |
elif data == ">": | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
elif data == '"': | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"unexpected-char-in-doctype"}) | |
self.currentToken["systemId"] = "" | |
self.state = self.doctypeSystemIdentifierDoubleQuotedState | |
elif data == "'": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"unexpected-char-in-doctype"}) | |
self.currentToken["systemId"] = "" | |
self.state = self.doctypeSystemIdentifierSingleQuotedState | |
elif data is EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"eof-in-doctype"}) | |
self.currentToken["correct"] = False | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
else: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"unexpected-char-in-doctype"}) | |
self.currentToken["correct"] = False | |
self.state = self.bogusDoctypeState | |
return True | |
def betweenDoctypePublicAndSystemIdentifiersState(self): | |
data = self.stream.char() | |
if data in spaceCharacters: | |
pass | |
elif data == ">": | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
elif data == '"': | |
self.currentToken["systemId"] = "" | |
self.state = self.doctypeSystemIdentifierDoubleQuotedState | |
elif data == "'": | |
self.currentToken["systemId"] = "" | |
self.state = self.doctypeSystemIdentifierSingleQuotedState | |
elif data == EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"eof-in-doctype"}) | |
self.currentToken["correct"] = False | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
else: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"unexpected-char-in-doctype"}) | |
self.currentToken["correct"] = False | |
self.state = self.bogusDoctypeState | |
return True | |
def afterDoctypeSystemKeywordState(self): | |
data = self.stream.char() | |
if data in spaceCharacters: | |
self.state = self.beforeDoctypeSystemIdentifierState | |
elif data in ("'", '"'): | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"unexpected-char-in-doctype"}) | |
self.stream.unget(data) | |
self.state = self.beforeDoctypeSystemIdentifierState | |
elif data is EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"eof-in-doctype"}) | |
self.currentToken["correct"] = False | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
else: | |
self.stream.unget(data) | |
self.state = self.beforeDoctypeSystemIdentifierState | |
return True | |
def beforeDoctypeSystemIdentifierState(self): | |
data = self.stream.char() | |
if data in spaceCharacters: | |
pass | |
elif data == "\"": | |
self.currentToken["systemId"] = "" | |
self.state = self.doctypeSystemIdentifierDoubleQuotedState | |
elif data == "'": | |
self.currentToken["systemId"] = "" | |
self.state = self.doctypeSystemIdentifierSingleQuotedState | |
elif data == ">": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"unexpected-char-in-doctype"}) | |
self.currentToken["correct"] = False | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
elif data is EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"eof-in-doctype"}) | |
self.currentToken["correct"] = False | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
else: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"unexpected-char-in-doctype"}) | |
self.currentToken["correct"] = False | |
self.state = self.bogusDoctypeState | |
return True | |
def doctypeSystemIdentifierDoubleQuotedState(self): | |
data = self.stream.char() | |
if data == "\"": | |
self.state = self.afterDoctypeSystemIdentifierState | |
elif data == "\u0000": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
self.currentToken["systemId"] += "\uFFFD" | |
elif data == ">": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"unexpected-end-of-doctype"}) | |
self.currentToken["correct"] = False | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
elif data is EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"eof-in-doctype"}) | |
self.currentToken["correct"] = False | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
else: | |
self.currentToken["systemId"] += data | |
return True | |
def doctypeSystemIdentifierSingleQuotedState(self): | |
data = self.stream.char() | |
if data == "'": | |
self.state = self.afterDoctypeSystemIdentifierState | |
elif data == "\u0000": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
self.currentToken["systemId"] += "\uFFFD" | |
elif data == ">": | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"unexpected-end-of-doctype"}) | |
self.currentToken["correct"] = False | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
elif data is EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"eof-in-doctype"}) | |
self.currentToken["correct"] = False | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
else: | |
self.currentToken["systemId"] += data | |
return True | |
def afterDoctypeSystemIdentifierState(self): | |
data = self.stream.char() | |
if data in spaceCharacters: | |
pass | |
elif data == ">": | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
elif data is EOF: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"eof-in-doctype"}) | |
self.currentToken["correct"] = False | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
else: | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": | |
"unexpected-char-in-doctype"}) | |
self.state = self.bogusDoctypeState | |
return True | |
def bogusDoctypeState(self): | |
data = self.stream.char() | |
if data == ">": | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
elif data is EOF: | |
# XXX EMIT | |
self.stream.unget(data) | |
self.tokenQueue.append(self.currentToken) | |
self.state = self.dataState | |
else: | |
pass | |
return True | |
def cdataSectionState(self): | |
data = [] | |
while True: | |
data.append(self.stream.charsUntil("]")) | |
data.append(self.stream.charsUntil(">")) | |
char = self.stream.char() | |
if char == EOF: | |
break | |
else: | |
assert char == ">" | |
if data[-1][-2:] == "]]": | |
data[-1] = data[-1][:-2] | |
break | |
else: | |
data.append(char) | |
data = "".join(data) # pylint:disable=redefined-variable-type | |
# Deal with null here rather than in the parser | |
nullCount = data.count("\u0000") | |
if nullCount > 0: | |
for _ in range(nullCount): | |
self.tokenQueue.append({"type": tokenTypes["ParseError"], | |
"data": "invalid-codepoint"}) | |
data = data.replace("\u0000", "\uFFFD") | |
if data: | |
self.tokenQueue.append({"type": tokenTypes["Characters"], | |
"data": data}) | |
self.state = self.dataState | |
return True |
from __future__ import absolute_import, division, unicode_literals | |
from .py import Trie as PyTrie | |
Trie = PyTrie | |
# pylint:disable=wrong-import-position | |
try: | |
from .datrie import Trie as DATrie | |
except ImportError: | |
pass | |
else: | |
Trie = DATrie | |
# pylint:enable=wrong-import-position |
from __future__ import absolute_import, division, unicode_literals | |
from collections import Mapping | |
class Trie(Mapping): | |
"""Abstract base class for tries""" | |
def keys(self, prefix=None): | |
# pylint:disable=arguments-differ | |
keys = super(Trie, self).keys() | |
if prefix is None: | |
return set(keys) | |
# Python 2.6: no set comprehensions | |
return set([x for x in keys if x.startswith(prefix)]) | |
def has_keys_with_prefix(self, prefix): | |
for key in self.keys(): | |
if key.startswith(prefix): | |
return True | |
return False | |
def longest_prefix(self, prefix): | |
if prefix in self: | |
return prefix | |
for i in range(1, len(prefix) + 1): | |
if prefix[:-i] in self: | |
return prefix[:-i] | |
raise KeyError(prefix) | |
def longest_prefix_item(self, prefix): | |
lprefix = self.longest_prefix(prefix) | |
return (lprefix, self[lprefix]) |
from __future__ import absolute_import, division, unicode_literals | |
from datrie import Trie as DATrie | |
from pip._vendor.six import text_type | |
from ._base import Trie as ABCTrie | |
class Trie(ABCTrie): | |
def __init__(self, data): | |
chars = set() | |
for key in data.keys(): | |
if not isinstance(key, text_type): | |
raise TypeError("All keys must be strings") | |
for char in key: | |
chars.add(char) | |
self._data = DATrie("".join(chars)) | |
for key, value in data.items(): | |
self._data[key] = value | |
def __contains__(self, key): | |
return key in self._data | |
def __len__(self): | |
return len(self._data) | |
def __iter__(self): | |
raise NotImplementedError() | |
def __getitem__(self, key): | |
return self._data[key] | |
def keys(self, prefix=None): | |
return self._data.keys(prefix) | |
def has_keys_with_prefix(self, prefix): | |
return self._data.has_keys_with_prefix(prefix) | |
def longest_prefix(self, prefix): | |
return self._data.longest_prefix(prefix) | |
def longest_prefix_item(self, prefix): | |
return self._data.longest_prefix_item(prefix) |
from __future__ import absolute_import, division, unicode_literals | |
from pip._vendor.six import text_type | |
from bisect import bisect_left | |
from ._base import Trie as ABCTrie | |
class Trie(ABCTrie): | |
def __init__(self, data): | |
if not all(isinstance(x, text_type) for x in data.keys()): | |
raise TypeError("All keys must be strings") | |
self._data = data | |
self._keys = sorted(data.keys()) | |
self._cachestr = "" | |
self._cachepoints = (0, len(data)) | |
def __contains__(self, key): | |
return key in self._data | |
def __len__(self): | |
return len(self._data) | |
def __iter__(self): | |
return iter(self._data) | |
def __getitem__(self, key): | |
return self._data[key] | |
def keys(self, prefix=None): | |
if prefix is None or prefix == "" or not self._keys: | |
return set(self._keys) | |
if prefix.startswith(self._cachestr): | |
lo, hi = self._cachepoints | |
start = i = bisect_left(self._keys, prefix, lo, hi) | |
else: | |
start = i = bisect_left(self._keys, prefix) | |
keys = set() | |
if start == len(self._keys): | |
return keys | |
while self._keys[i].startswith(prefix): | |
keys.add(self._keys[i]) | |
i += 1 | |
self._cachestr = prefix | |
self._cachepoints = (start, i) | |
return keys | |
def has_keys_with_prefix(self, prefix): | |
if prefix in self._data: | |
return True | |
if prefix.startswith(self._cachestr): | |
lo, hi = self._cachepoints | |
i = bisect_left(self._keys, prefix, lo, hi) | |
else: | |
i = bisect_left(self._keys, prefix) | |
if i == len(self._keys): | |
return False | |
return self._keys[i].startswith(prefix) |
from __future__ import absolute_import, division, unicode_literals | |
import sys | |
from types import ModuleType | |
from pip._vendor.six import text_type | |
try: | |
import xml.etree.cElementTree as default_etree | |
except ImportError: | |
import xml.etree.ElementTree as default_etree | |
__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair", | |
"surrogatePairToCodepoint", "moduleFactoryFactory", | |
"supports_lone_surrogates", "PY27"] | |
PY27 = sys.version_info[0] == 2 and sys.version_info[1] >= 7 | |
# Platforms not supporting lone surrogates (\uD800-\uDFFF) should be | |
# caught by the below test. In general this would be any platform | |
# using UTF-16 as its encoding of unicode strings, such as | |
# Jython. This is because UTF-16 itself is based on the use of such | |
# surrogates, and there is no mechanism to further escape such | |
# escapes. | |
try: | |
_x = eval('"\\uD800"') # pylint:disable=eval-used | |
if not isinstance(_x, text_type): | |
# We need this with u"" because of http://bugs.jython.org/issue2039 | |
_x = eval('u"\\uD800"') # pylint:disable=eval-used | |
assert isinstance(_x, text_type) | |
except: # pylint:disable=bare-except | |
supports_lone_surrogates = False | |
else: | |
supports_lone_surrogates = True | |
class MethodDispatcher(dict): | |
"""Dict with 2 special properties: | |
On initiation, keys that are lists, sets or tuples are converted to | |
multiple keys so accessing any one of the items in the original | |
list-like object returns the matching value | |
md = MethodDispatcher({("foo", "bar"):"baz"}) | |
md["foo"] == "baz" | |
A default value which can be set through the default attribute. | |
""" | |
def __init__(self, items=()): | |
# Using _dictEntries instead of directly assigning to self is about | |
# twice as fast. Please do careful performance testing before changing | |
# anything here. | |
_dictEntries = [] | |
for name, value in items: | |
if isinstance(name, (list, tuple, frozenset, set)): | |
for item in name: | |
_dictEntries.append((item, value)) | |
else: | |
_dictEntries.append((name, value)) | |
dict.__init__(self, _dictEntries) | |
assert len(self) == len(_dictEntries) | |
self.default = None | |
def __getitem__(self, key): | |
return dict.get(self, key, self.default) | |
# Some utility functions to deal with weirdness around UCS2 vs UCS4 | |
# python builds | |
def isSurrogatePair(data): | |
return (len(data) == 2 and | |
ord(data[0]) >= 0xD800 and ord(data[0]) <= 0xDBFF and | |
ord(data[1]) >= 0xDC00 and ord(data[1]) <= 0xDFFF) | |
def surrogatePairToCodepoint(data): | |
char_val = (0x10000 + (ord(data[0]) - 0xD800) * 0x400 + | |
(ord(data[1]) - 0xDC00)) | |
return char_val | |
# Module Factory Factory (no, this isn't Java, I know) | |
# Here to stop this being duplicated all over the place. | |
def moduleFactoryFactory(factory): | |
moduleCache = {} | |
def moduleFactory(baseModule, *args, **kwargs): | |
if isinstance(ModuleType.__name__, type("")): | |
name = "_%s_factory" % baseModule.__name__ | |
else: | |
name = b"_%s_factory" % baseModule.__name__ | |
kwargs_tuple = tuple(kwargs.items()) | |
try: | |
return moduleCache[name][args][kwargs_tuple] | |
except KeyError: | |
mod = ModuleType(name) | |
objs = factory(baseModule, *args, **kwargs) | |
mod.__dict__.update(objs) | |
if "name" not in moduleCache: | |
moduleCache[name] = {} | |
if "args" not in moduleCache[name]: | |
moduleCache[name][args] = {} | |
if "kwargs" not in moduleCache[name][args]: | |
moduleCache[name][args][kwargs_tuple] = {} | |
moduleCache[name][args][kwargs_tuple] = mod | |
return mod | |
return moduleFactory | |
def memoize(func): | |
cache = {} | |
def wrapped(*args, **kwargs): | |
key = (tuple(args), tuple(kwargs.items())) | |
if key not in cache: | |
cache[key] = func(*args, **kwargs) | |
return cache[key] | |
return wrapped |
from __future__ import absolute_import, division, unicode_literals | |
import string | |
EOF = None | |
E = { | |
"null-character": | |
"Null character in input stream, replaced with U+FFFD.", | |
"invalid-codepoint": | |
"Invalid codepoint in stream.", | |
"incorrectly-placed-solidus": | |
"Solidus (/) incorrectly placed in tag.", | |
"incorrect-cr-newline-entity": | |
"Incorrect CR newline entity, replaced with LF.", | |
"illegal-windows-1252-entity": | |
"Entity used with illegal number (windows-1252 reference).", | |
"cant-convert-numeric-entity": | |
"Numeric entity couldn't be converted to character " | |
"(codepoint U+%(charAsInt)08x).", | |
"illegal-codepoint-for-numeric-entity": | |
"Numeric entity represents an illegal codepoint: " | |
"U+%(charAsInt)08x.", | |
"numeric-entity-without-semicolon": | |
"Numeric entity didn't end with ';'.", | |
"expected-numeric-entity-but-got-eof": | |
"Numeric entity expected. Got end of file instead.", | |
"expected-numeric-entity": | |
"Numeric entity expected but none found.", | |
"named-entity-without-semicolon": | |
"Named entity didn't end with ';'.", | |
"expected-named-entity": | |
"Named entity expected. Got none.", | |
"attributes-in-end-tag": | |
"End tag contains unexpected attributes.", | |
'self-closing-flag-on-end-tag': | |
"End tag contains unexpected self-closing flag.", | |
"expected-tag-name-but-got-right-bracket": | |
"Expected tag name. Got '>' instead.", | |
"expected-tag-name-but-got-question-mark": | |
"Expected tag name. Got '?' instead. (HTML doesn't " | |
"support processing instructions.)", | |
"expected-tag-name": | |
"Expected tag name. Got something else instead", | |
"expected-closing-tag-but-got-right-bracket": | |
"Expected closing tag. Got '>' instead. Ignoring '</>'.", | |
"expected-closing-tag-but-got-eof": | |
"Expected closing tag. Unexpected end of file.", | |
"expected-closing-tag-but-got-char": | |
"Expected closing tag. Unexpected character '%(data)s' found.", | |
"eof-in-tag-name": | |
"Unexpected end of file in the tag name.", | |
"expected-attribute-name-but-got-eof": | |
"Unexpected end of file. Expected attribute name instead.", | |
"eof-in-attribute-name": | |
"Unexpected end of file in attribute name.", | |
"invalid-character-in-attribute-name": | |
"Invalid character in attribute name", | |
"duplicate-attribute": | |
"Dropped duplicate attribute on tag.", | |
"expected-end-of-tag-name-but-got-eof": | |
"Unexpected end of file. Expected = or end of tag.", | |
"expected-attribute-value-but-got-eof": | |
"Unexpected end of file. Expected attribute value.", | |
"expected-attribute-value-but-got-right-bracket": | |
"Expected attribute value. Got '>' instead.", | |
'equals-in-unquoted-attribute-value': | |
"Unexpected = in unquoted attribute", | |
'unexpected-character-in-unquoted-attribute-value': | |
"Unexpected character in unquoted attribute", | |
"invalid-character-after-attribute-name": | |
"Unexpected character after attribute name.", | |
"unexpected-character-after-attribute-value": | |
"Unexpected character after attribute value.", | |
"eof-in-attribute-value-double-quote": | |
"Unexpected end of file in attribute value (\").", | |
"eof-in-attribute-value-single-quote": | |
"Unexpected end of file in attribute value (').", | |
"eof-in-attribute-value-no-quotes": | |
"Unexpected end of file in attribute value.", | |
"unexpected-EOF-after-solidus-in-tag": | |
"Unexpected end of file in tag. Expected >", | |
"unexpected-character-after-solidus-in-tag": | |
"Unexpected character after / in tag. Expected >", | |
"expected-dashes-or-doctype": | |
"Expected '--' or 'DOCTYPE'. Not found.", | |
"unexpected-bang-after-double-dash-in-comment": | |
"Unexpected ! after -- in comment", | |
"unexpected-space-after-double-dash-in-comment": | |
"Unexpected space after -- in comment", | |
"incorrect-comment": | |
"Incorrect comment.", | |
"eof-in-comment": | |
"Unexpected end of file in comment.", | |
"eof-in-comment-end-dash": | |
"Unexpected end of file in comment (-)", | |
"unexpected-dash-after-double-dash-in-comment": | |
"Unexpected '-' after '--' found in comment.", | |
"eof-in-comment-double-dash": | |
"Unexpected end of file in comment (--).", | |
"eof-in-comment-end-space-state": | |
"Unexpected end of file in comment.", | |
"eof-in-comment-end-bang-state": | |
"Unexpected end of file in comment.", | |
"unexpected-char-in-comment": | |
"Unexpected character in comment found.", | |
"need-space-after-doctype": | |
"No space after literal string 'DOCTYPE'.", | |
"expected-doctype-name-but-got-right-bracket": | |
"Unexpected > character. Expected DOCTYPE name.", | |
"expected-doctype-name-but-got-eof": | |
"Unexpected end of file. Expected DOCTYPE name.", | |
"eof-in-doctype-name": | |
"Unexpected end of file in DOCTYPE name.", | |
"eof-in-doctype": | |
"Unexpected end of file in DOCTYPE.", | |
"expected-space-or-right-bracket-in-doctype": | |
"Expected space or '>'. Got '%(data)s'", | |
"unexpected-end-of-doctype": | |
"Unexpected end of DOCTYPE.", | |
"unexpected-char-in-doctype": | |
"Unexpected character in DOCTYPE.", | |
"eof-in-innerhtml": | |
"XXX innerHTML EOF", | |
"unexpected-doctype": | |
"Unexpected DOCTYPE. Ignored.", | |
"non-html-root": | |
"html needs to be the first start tag.", | |
"expected-doctype-but-got-eof": | |
"Unexpected End of file. Expected DOCTYPE.", | |
"unknown-doctype": | |
"Erroneous DOCTYPE.", | |
"expected-doctype-but-got-chars": | |
"Unexpected non-space characters. Expected DOCTYPE.", | |
"expected-doctype-but-got-start-tag": | |
"Unexpected start tag (%(name)s). Expected DOCTYPE.", | |
"expected-doctype-but-got-end-tag": | |
"Unexpected end tag (%(name)s). Expected DOCTYPE.", | |
"end-tag-after-implied-root": | |
"Unexpected end tag (%(name)s) after the (implied) root element.", | |
"expected-named-closing-tag-but-got-eof": | |
"Unexpected end of file. Expected end tag (%(name)s).", | |
"two-heads-are-not-better-than-one": | |
"Unexpected start tag head in existing head. Ignored.", | |
"unexpected-end-tag": | |
"Unexpected end tag (%(name)s). Ignored.", | |
"unexpected-start-tag-out-of-my-head": | |
"Unexpected start tag (%(name)s) that can be in head. Moved.", | |
"unexpected-start-tag": | |
"Unexpected start tag (%(name)s).", | |
"missing-end-tag": | |
"Missing end tag (%(name)s).", | |
"missing-end-tags": | |
"Missing end tags (%(name)s).", | |
"unexpected-start-tag-implies-end-tag": | |
"Unexpected start tag (%(startName)s) " | |
"implies end tag (%(endName)s).", | |
"unexpected-start-tag-treated-as": | |
"Unexpected start tag (%(originalName)s). Treated as %(newName)s.", | |
"deprecated-tag": | |
"Unexpected start tag %(name)s. Don't use it!", | |
"unexpected-start-tag-ignored": | |
"Unexpected start tag %(name)s. Ignored.", | |
"expected-one-end-tag-but-got-another": | |
"Unexpected end tag (%(gotName)s). " | |
"Missing end tag (%(expectedName)s).", | |
"end-tag-too-early": | |
"End tag (%(name)s) seen too early. Expected other end tag.", | |
"end-tag-too-early-named": | |
"Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s).", | |
"end-tag-too-early-ignored": | |
"End tag (%(name)s) seen too early. Ignored.", | |
"adoption-agency-1.1": | |
"End tag (%(name)s) violates step 1, " | |
"paragraph 1 of the adoption agency algorithm.", | |
"adoption-agency-1.2": | |
"End tag (%(name)s) violates step 1, " | |
"paragraph 2 of the adoption agency algorithm.", | |
"adoption-agency-1.3": | |
"End tag (%(name)s) violates step 1, " | |
"paragraph 3 of the adoption agency algorithm.", | |
"adoption-agency-4.4": | |
"End tag (%(name)s) violates step 4, " | |
"paragraph 4 of the adoption agency algorithm.", | |
"unexpected-end-tag-treated-as": | |
"Unexpected end tag (%(originalName)s). Treated as %(newName)s.", | |
"no-end-tag": | |
"This element (%(name)s) has no end tag.", | |
"unexpected-implied-end-tag-in-table": | |
"Unexpected implied end tag (%(name)s) in the table phase.", | |
"unexpected-implied-end-tag-in-table-body": | |
"Unexpected implied end tag (%(name)s) in the table body phase.", | |
"unexpected-char-implies-table-voodoo": | |
"Unexpected non-space characters in " | |
"table context caused voodoo mode.", | |
"unexpected-hidden-input-in-table": | |
"Unexpected input with type hidden in table context.", | |
"unexpected-form-in-table": | |
"Unexpected form in table context.", | |
"unexpected-start-tag-implies-table-voodoo": | |
"Unexpected start tag (%(name)s) in " | |
"table context caused voodoo mode.", | |
"unexpected-end-tag-implies-table-voodoo": | |
"Unexpected end tag (%(name)s) in " | |
"table context caused voodoo mode.", | |
"unexpected-cell-in-table-body": | |
"Unexpected table cell start tag (%(name)s) " | |
"in the table body phase.", | |
"unexpected-cell-end-tag": | |
"Got table cell end tag (%(name)s) " | |
"while required end tags are missing.", | |
"unexpected-end-tag-in-table-body": | |
"Unexpected end tag (%(name)s) in the table body phase. Ignored.", | |
"unexpected-implied-end-tag-in-table-row": | |
"Unexpected implied end tag (%(name)s) in the table row phase.", | |
"unexpected-end-tag-in-table-row": | |
"Unexpected end tag (%(name)s) in the table row phase. Ignored.", | |
"unexpected-select-in-select": | |
"Unexpected select start tag in the select phase " | |
"treated as select end tag.", | |
"unexpected-input-in-select": | |
"Unexpected input start tag in the select phase.", | |
"unexpected-start-tag-in-select": | |
"Unexpected start tag token (%(name)s in the select phase. " | |
"Ignored.", | |
"unexpected-end-tag-in-select": | |
"Unexpected end tag (%(name)s) in the select phase. Ignored.", | |
"unexpected-table-element-start-tag-in-select-in-table": | |
"Unexpected table element start tag (%(name)s) in the select in table phase.", | |
"unexpected-table-element-end-tag-in-select-in-table": | |
"Unexpected table element end tag (%(name)s) in the select in table phase.", | |
"unexpected-char-after-body": | |
"Unexpected non-space characters in the after body phase.", | |
"unexpected-start-tag-after-body": | |
"Unexpected start tag token (%(name)s)" | |
" in the after body phase.", | |
"unexpected-end-tag-after-body": | |
"Unexpected end tag token (%(name)s)" | |
" in the after body phase.", | |
"unexpected-char-in-frameset": | |
"Unexpected characters in the frameset phase. Characters ignored.", | |
"unexpected-start-tag-in-frameset": | |
"Unexpected start tag token (%(name)s)" | |
" in the frameset phase. Ignored.", | |
"unexpected-frameset-in-frameset-innerhtml": | |
"Unexpected end tag token (frameset) " | |
"in the frameset phase (innerHTML).", | |
"unexpected-end-tag-in-frameset": | |
"Unexpected end tag token (%(name)s)" | |
" in the frameset phase. Ignored.", | |
"unexpected-char-after-frameset": | |
"Unexpected non-space characters in the " | |
"after frameset phase. Ignored.", | |
"unexpected-start-tag-after-frameset": | |
"Unexpected start tag (%(name)s)" | |
" in the after frameset phase. Ignored.", | |
"unexpected-end-tag-after-frameset": | |
"Unexpected end tag (%(name)s)" | |
" in the after frameset phase. Ignored.", | |
"unexpected-end-tag-after-body-innerhtml": | |
"Unexpected end tag after body(innerHtml)", | |
"expected-eof-but-got-char": | |
"Unexpected non-space characters. Expected end of file.", | |
"expected-eof-but-got-start-tag": | |
"Unexpected start tag (%(name)s)" | |
". Expected end of file.", | |
"expected-eof-but-got-end-tag": | |
"Unexpected end tag (%(name)s)" | |
". Expected end of file.", | |
"eof-in-table": | |
"Unexpected end of file. Expected table content.", | |
"eof-in-select": | |
"Unexpected end of file. Expected select content.", | |
"eof-in-frameset": | |
"Unexpected end of file. Expected frameset content.", | |
"eof-in-script-in-script": | |
"Unexpected end of file. Expected script content.", | |
"eof-in-foreign-lands": | |
"Unexpected end of file. Expected foreign content", | |
"non-void-element-with-trailing-solidus": | |
"Trailing solidus not allowed on element %(name)s", | |
"unexpected-html-element-in-foreign-content": | |
"Element %(name)s not allowed in a non-html context", | |
"unexpected-end-tag-before-html": | |
"Unexpected end tag (%(name)s) before html.", | |
"unexpected-inhead-noscript-tag": | |
"Element %(name)s not allowed in a inhead-noscript context", | |
"eof-in-head-noscript": | |
"Unexpected end of file. Expected inhead-noscript content", | |
"char-in-head-noscript": | |
"Unexpected non-space character. Expected inhead-noscript content", | |
"XXX-undefined-error": | |
"Undefined error (this sucks and should be fixed)", | |
} | |
namespaces = { | |
"html": "http://www.w3.org/1999/xhtml", | |
"mathml": "http://www.w3.org/1998/Math/MathML", | |
"svg": "http://www.w3.org/2000/svg", | |
"xlink": "http://www.w3.org/1999/xlink", | |
"xml": "http://www.w3.org/XML/1998/namespace", | |
"xmlns": "http://www.w3.org/2000/xmlns/" | |
} | |
scopingElements = frozenset([ | |
(namespaces["html"], "applet"), | |
(namespaces["html"], "caption"), | |
(namespaces["html"], "html"), | |
(namespaces["html"], "marquee"), | |
(namespaces["html"], "object"), | |
(namespaces["html"], "table"), | |
(namespaces["html"], "td"), | |
(namespaces["html"], "th"), | |
(namespaces["mathml"], "mi"), | |
(namespaces["mathml"], "mo"), | |
(namespaces["mathml"], "mn"), | |
(namespaces["mathml"], "ms"), | |
(namespaces["mathml"], "mtext"), | |
(namespaces["mathml"], "annotation-xml"), | |
(namespaces["svg"], "foreignObject"), | |
(namespaces["svg"], "desc"), | |
(namespaces["svg"], "title"), | |
]) | |
formattingElements = frozenset([ | |
(namespaces["html"], "a"), | |
(namespaces["html"], "b"), | |
(namespaces["html"], "big"), | |
(namespaces["html"], "code"), | |
(namespaces["html"], "em"), | |
(namespaces["html"], "font"), | |
(namespaces["html"], "i"), | |
(namespaces["html"], "nobr"), | |
(namespaces["html"], "s"), | |
(namespaces["html"], "small"), | |
(namespaces["html"], "strike"), | |
(namespaces["html"], "strong"), | |
(namespaces["html"], "tt"), | |
(namespaces["html"], "u") | |
]) | |
specialElements = frozenset([ | |
(namespaces["html"], "address"), | |
(namespaces["html"], "applet"), | |
(namespaces["html"], "area"), | |
(namespaces["html"], "article"), | |
(namespaces["html"], "aside"), | |
(namespaces["html"], "base"), | |
(namespaces["html"], "basefont"), | |
(namespaces["html"], "bgsound"), | |
(namespaces["html"], "blockquote"), | |
(namespaces["html"], "body"), | |
(namespaces["html"], "br"), | |
(namespaces["html"], "button"), | |
(namespaces["html"], "caption"), | |
(namespaces["html"], "center"), | |
(namespaces["html"], "col"), | |
(namespaces["html"], "colgroup"), | |
(namespaces["html"], "command"), | |
(namespaces["html"], "dd"), | |
(namespaces["html"], "details"), | |
(namespaces["html"], "dir"), | |
(namespaces["html"], "div"), | |
(namespaces["html"], "dl"), | |
(namespaces["html"], "dt"), | |
(namespaces["html"], "embed"), | |
(namespaces["html"], "fieldset"), | |
(namespaces["html"], "figure"), | |
(namespaces["html"], "footer"), | |
(namespaces["html"], "form"), | |
(namespaces["html"], "frame"), | |
(namespaces["html"], "frameset"), | |
(namespaces["html"], "h1"), | |
(namespaces["html"], "h2"), | |
(namespaces["html"], "h3"), | |
(namespaces["html"], "h4"), | |
(namespaces["html"], "h5"), | |
(namespaces["html"], "h6"), | |
(namespaces["html"], "head"), | |
(namespaces["html"], "header"), | |
(namespaces["html"], "hr"), | |
(namespaces["html"], "html"), | |
(namespaces["html"], "iframe"), | |
# Note that image is commented out in the spec as "this isn't an | |
# element that can end up on the stack, so it doesn't matter," | |
(namespaces["html"], "image"), | |
(namespaces["html"], "img"), | |
(namespaces["html"], "input"), | |
(namespaces["html"], "isindex"), | |
(namespaces["html"], "li"), | |
(namespaces["html"], "link"), | |
(namespaces["html"], "listing"), | |
(namespaces["html"], "marquee"), | |
(namespaces["html"], "menu"), | |
(namespaces["html"], "meta"), | |
(namespaces["html"], "nav"), | |
(namespaces["html"], "noembed"), | |
(namespaces["html"], "noframes"), | |
(namespaces["html"], "noscript"), | |
(namespaces["html"], "object"), | |
(namespaces["html"], "ol"), | |
(namespaces["html"], "p"), | |
(namespaces["html"], "param"), | |
(namespaces["html"], "plaintext"), | |
(namespaces["html"], "pre"), | |
(namespaces["html"], "script"), | |
(namespaces["html"], "section"), | |
(namespaces["html"], "select"), | |
(namespaces["html"], "style"), | |
(namespaces["html"], "table"), | |
(namespaces["html"], "tbody"), | |
(namespaces["html"], "td"), | |
(namespaces["html"], "textarea"), | |
(namespaces["html"], "tfoot"), | |
(namespaces["html"], "th"), | |
(namespaces["html"], "thead"), | |
(namespaces["html"], "title"), | |
(namespaces["html"], "tr"), | |
(namespaces["html"], "ul"), | |
(namespaces["html"], "wbr"), | |
(namespaces["html"], "xmp"), | |
(namespaces["svg"], "foreignObject") | |
]) | |
htmlIntegrationPointElements = frozenset([ | |
(namespaces["mathml"], "annotaion-xml"), | |
(namespaces["svg"], "foreignObject"), | |
(namespaces["svg"], "desc"), | |
(namespaces["svg"], "title") | |
]) | |
mathmlTextIntegrationPointElements = frozenset([ | |
(namespaces["mathml"], "mi"), | |
(namespaces["mathml"], "mo"), | |
(namespaces["mathml"], "mn"), | |
(namespaces["mathml"], "ms"), | |
(namespaces["mathml"], "mtext") | |
]) | |
adjustSVGAttributes = { | |
"attributename": "attributeName", | |
"attributetype": "attributeType", | |
"basefrequency": "baseFrequency", | |
"baseprofile": "baseProfile", | |
"calcmode": "calcMode", | |
"clippathunits": "clipPathUnits", | |
"contentscripttype": "contentScriptType", | |
"contentstyletype": "contentStyleType", | |
"diffuseconstant": "diffuseConstant", | |
"edgemode": "edgeMode", | |
"externalresourcesrequired": "externalResourcesRequired", | |
"filterres": "filterRes", | |
"filterunits": "filterUnits", | |
"glyphref": "glyphRef", | |
"gradienttransform": "gradientTransform", | |
"gradientunits": "gradientUnits", | |
"kernelmatrix": "kernelMatrix", | |
"kernelunitlength": "kernelUnitLength", | |
"keypoints": "keyPoints", | |
"keysplines": "keySplines", | |
"keytimes": "keyTimes", | |
"lengthadjust": "lengthAdjust", | |
"limitingconeangle": "limitingConeAngle", | |
"markerheight": "markerHeight", | |
"markerunits": "markerUnits", | |
"markerwidth": "markerWidth", | |
"maskcontentunits": "maskContentUnits", | |
"maskunits": "maskUnits", | |
"numoctaves": "numOctaves", | |
"pathlength": "pathLength", | |
"patterncontentunits": "patternContentUnits", | |
"patterntransform": "patternTransform", | |
"patternunits": "patternUnits", | |
"pointsatx": "pointsAtX", | |
"pointsaty": "pointsAtY", | |
"pointsatz": "pointsAtZ", | |
"preservealpha": "preserveAlpha", | |
"preserveaspectratio": "preserveAspectRatio", | |
"primitiveunits": "primitiveUnits", | |
"refx": "refX", | |
"refy": "refY", | |
"repeatcount": "repeatCount", | |
"repeatdur": "repeatDur", | |
"requiredextensions": "requiredExtensions", | |
"requiredfeatures": "requiredFeatures", | |
"specularconstant": "specularConstant", | |
"specularexponent": "specularExponent", | |
"spreadmethod": "spreadMethod", | |
"startoffset": "startOffset", | |
"stddeviation": "stdDeviation", | |
"stitchtiles": "stitchTiles", | |
"surfacescale": "surfaceScale", | |
"systemlanguage": "systemLanguage", | |
"tablevalues": "tableValues", | |
"targetx": "targetX", | |
"targety": "targetY", | |
"textlength": "textLength", | |
"viewbox": "viewBox", | |
"viewtarget": "viewTarget", | |
"xchannelselector": "xChannelSelector", | |
"ychannelselector": "yChannelSelector", | |
"zoomandpan": "zoomAndPan" | |
} | |
adjustMathMLAttributes = {"definitionurl": "definitionURL"} | |
adjustForeignAttributes = { | |
"xlink:actuate": ("xlink", "actuate", namespaces["xlink"]), | |
"xlink:arcrole": ("xlink", "arcrole", namespaces["xlink"]), | |
"xlink:href": ("xlink", "href", namespaces["xlink"]), | |
"xlink:role": ("xlink", "role", namespaces["xlink"]), | |
"xlink:show": ("xlink", "show", namespaces["xlink"]), | |
"xlink:title": ("xlink", "title", namespaces["xlink"]), | |
"xlink:type": ("xlink", "type", namespaces["xlink"]), | |
"xml:base": ("xml", "base", namespaces["xml"]), | |
"xml:lang": ("xml", "lang", namespaces["xml"]), | |
"xml:space": ("xml", "space", namespaces["xml"]), | |
"xmlns": (None, "xmlns", namespaces["xmlns"]), | |
"xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"]) | |
} | |
unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in | |
adjustForeignAttributes.items()]) | |
spaceCharacters = frozenset([ | |
"\t", | |
"\n", | |
"\u000C", | |
" ", | |
"\r" | |
]) | |
tableInsertModeElements = frozenset([ | |
"table", | |
"tbody", | |
"tfoot", | |
"thead", | |
"tr" | |
]) | |
asciiLowercase = frozenset(string.ascii_lowercase) | |
asciiUppercase = frozenset(string.ascii_uppercase) | |
asciiLetters = frozenset(string.ascii_letters) | |
digits = frozenset(string.digits) | |
hexDigits = frozenset(string.hexdigits) | |
asciiUpper2Lower = dict([(ord(c), ord(c.lower())) | |
for c in string.ascii_uppercase]) | |
# Heading elements need to be ordered | |
headingElements = ( | |
"h1", | |
"h2", | |
"h3", | |
"h4", | |
"h5", | |
"h6" | |
) | |
voidElements = frozenset([ | |
"base", | |
"command", | |
"event-source", | |
"link", | |
"meta", | |
"hr", | |
"br", | |
"img", | |
"embed", | |
"param", | |
"area", | |
"col", | |
"input", | |
"source", | |
"track" | |
]) | |
cdataElements = frozenset(['title', 'textarea']) | |
rcdataElements = frozenset([ | |
'style', | |
'script', | |
'xmp', | |
'iframe', | |
'noembed', | |
'noframes', | |
'noscript' | |
]) | |
booleanAttributes = { | |
"": frozenset(["irrelevant"]), | |
"style": frozenset(["scoped"]), | |
"img": frozenset(["ismap"]), | |
"audio": frozenset(["autoplay", "controls"]), | |
"video": frozenset(["autoplay", "controls"]), | |
"script": frozenset(["defer", "async"]), | |
"details": frozenset(["open"]), | |
"datagrid": frozenset(["multiple", "disabled"]), | |
"command": frozenset(["hidden", "disabled", "checked", "default"]), | |
"hr": frozenset(["noshade"]), | |
"menu": frozenset(["autosubmit"]), | |
"fieldset": frozenset(["disabled", "readonly"]), | |
"option": frozenset(["disabled", "readonly", "selected"]), | |
"optgroup": frozenset(["disabled", "readonly"]), | |
"button": frozenset(["disabled", "autofocus"]), | |
"input": frozenset(["disabled", "readonly", "required", "autofocus", "checked", "ismap"]), | |
"select": frozenset(["disabled", "readonly", "autofocus", "multiple"]), | |
"output": frozenset(["disabled", "readonly"]), | |
} | |
# entitiesWindows1252 has to be _ordered_ and needs to have an index. It | |
# therefore can't be a frozenset. | |
entitiesWindows1252 = ( | |
8364, # 0x80 0x20AC EURO SIGN | |
65533, # 0x81 UNDEFINED | |
8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK | |
402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK | |
8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK | |
8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS | |
8224, # 0x86 0x2020 DAGGER | |
8225, # 0x87 0x2021 DOUBLE DAGGER | |
710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT | |
8240, # 0x89 0x2030 PER MILLE SIGN | |
352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON | |
8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK | |
338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE | |
65533, # 0x8D UNDEFINED | |
381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON | |
65533, # 0x8F UNDEFINED | |
65533, # 0x90 UNDEFINED | |
8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK | |
8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK | |
8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK | |
8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK | |
8226, # 0x95 0x2022 BULLET | |
8211, # 0x96 0x2013 EN DASH | |
8212, # 0x97 0x2014 EM DASH | |
732, # 0x98 0x02DC SMALL TILDE | |
8482, # 0x99 0x2122 TRADE MARK SIGN | |
353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON | |
8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK | |
339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE | |
65533, # 0x9D UNDEFINED | |
382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON | |
376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS | |
) | |
xmlEntities = frozenset(['lt;', 'gt;', 'amp;', 'apos;', 'quot;']) | |
entities = { | |
"AElig": "\xc6", | |
"AElig;": "\xc6", | |
"AMP": "&", | |
"AMP;": "&", | |
"Aacute": "\xc1", | |
"Aacute;": "\xc1", | |
"Abreve;": "\u0102", | |
"Acirc": "\xc2", | |
"Acirc;": "\xc2", | |
"Acy;": "\u0410", | |
"Afr;": "\U0001d504", | |
"Agrave": "\xc0", | |
"Agrave;": "\xc0", | |
"Alpha;": "\u0391", | |
"Amacr;": "\u0100", | |
"And;": "\u2a53", | |
"Aogon;": "\u0104", | |
"Aopf;": "\U0001d538", | |
"ApplyFunction;": "\u2061", | |
"Aring": "\xc5", | |
"Aring;": "\xc5", | |
"Ascr;": "\U0001d49c", | |
"Assign;": "\u2254", | |
"Atilde": "\xc3", | |
"Atilde;": "\xc3", | |
"Auml": "\xc4", | |
"Auml;": "\xc4", | |
"Backslash;": "\u2216", | |
"Barv;": "\u2ae7", | |
"Barwed;": "\u2306", | |
"Bcy;": "\u0411", | |
"Because;": "\u2235", | |
"Bernoullis;": "\u212c", | |
"Beta;": "\u0392", | |
"Bfr;": "\U0001d505", | |
"Bopf;": "\U0001d539", | |
"Breve;": "\u02d8", | |
"Bscr;": "\u212c", | |
"Bumpeq;": "\u224e", | |
"CHcy;": "\u0427", | |
"COPY": "\xa9", | |
"COPY;": "\xa9", | |
"Cacute;": "\u0106", | |
"Cap;": "\u22d2", | |
"CapitalDifferentialD;": "\u2145", | |
"Cayleys;": "\u212d", | |
"Ccaron;": "\u010c", | |
"Ccedil": "\xc7", | |
"Ccedil;": "\xc7", | |
"Ccirc;": "\u0108", | |
"Cconint;": "\u2230", | |
"Cdot;": "\u010a", | |
"Cedilla;": "\xb8", | |
"CenterDot;": "\xb7", | |
"Cfr;": "\u212d", | |
"Chi;": "\u03a7", | |
"CircleDot;": "\u2299", | |
"CircleMinus;": "\u2296", | |
"CirclePlus;": "\u2295", | |
"CircleTimes;": "\u2297", | |
"ClockwiseContourIntegral;": "\u2232", | |
"CloseCurlyDoubleQuote;": "\u201d", | |
"CloseCurlyQuote;": "\u2019", | |
"Colon;": "\u2237", | |
"Colone;": "\u2a74", | |
"Congruent;": "\u2261", | |
"Conint;": "\u222f", | |
"ContourIntegral;": "\u222e", | |
"Copf;": "\u2102", | |
"Coproduct;": "\u2210", | |
"CounterClockwiseContourIntegral;": "\u2233", | |
"Cross;": "\u2a2f", | |
"Cscr;": "\U0001d49e", | |
"Cup;": "\u22d3", | |
"CupCap;": "\u224d", | |
"DD;": "\u2145", | |
"DDotrahd;": "\u2911", | |
"DJcy;": "\u0402", | |
"DScy;": "\u0405", | |
"DZcy;": "\u040f", | |
"Dagger;": "\u2021", | |
"Darr;": "\u21a1", | |
"Dashv;": "\u2ae4", | |
"Dcaron;": "\u010e", | |
"Dcy;": "\u0414", | |
"Del;": "\u2207", | |
"Delta;": "\u0394", | |
"Dfr;": "\U0001d507", | |
"DiacriticalAcute;": "\xb4", | |
"DiacriticalDot;": "\u02d9", | |
"DiacriticalDoubleAcute;": "\u02dd", | |
"DiacriticalGrave;": "`", | |
"DiacriticalTilde;": "\u02dc", | |
"Diamond;": "\u22c4", | |
"DifferentialD;": "\u2146", | |
"Dopf;": "\U0001d53b", | |
"Dot;": "\xa8", | |
"DotDot;": "\u20dc", | |
"DotEqual;": "\u2250", | |
"DoubleContourIntegral;": "\u222f", | |
"DoubleDot;": "\xa8", | |
"DoubleDownArrow;": "\u21d3", | |
"DoubleLeftArrow;": "\u21d0", | |
"DoubleLeftRightArrow;": "\u21d4", | |
"DoubleLeftTee;": "\u2ae4", | |
"DoubleLongLeftArrow;": "\u27f8", | |
"DoubleLongLeftRightArrow;": "\u27fa", | |
"DoubleLongRightArrow;": "\u27f9", | |
"DoubleRightArrow;": "\u21d2", | |
"DoubleRightTee;": "\u22a8", | |
"DoubleUpArrow;": "\u21d1", | |
"DoubleUpDownArrow;": "\u21d5", | |
"DoubleVerticalBar;": "\u2225", | |
"DownArrow;": "\u2193", | |
"DownArrowBar;": "\u2913", | |
"DownArrowUpArrow;": "\u21f5", | |
"DownBreve;": "\u0311", | |
"DownLeftRightVector;": "\u2950", | |
"DownLeftTeeVector;": "\u295e", | |
"DownLeftVector;": "\u21bd", | |
"DownLeftVectorBar;": "\u2956", | |
"DownRightTeeVector;": "\u295f", | |
"DownRightVector;": "\u21c1", | |
"DownRightVectorBar;": "\u2957", | |
"DownTee;": "\u22a4", | |
"DownTeeArrow;": "\u21a7", | |
"Downarrow;": "\u21d3", | |
"Dscr;": "\U0001d49f", | |
"Dstrok;": "\u0110", | |
"ENG;": "\u014a", | |
"ETH": "\xd0", | |
"ETH;": "\xd0", | |
"Eacute": "\xc9", | |
"Eacute;": "\xc9", | |
"Ecaron;": "\u011a", | |
"Ecirc": "\xca", | |
"Ecirc;": "\xca", | |
"Ecy;": "\u042d", | |
"Edot;": "\u0116", | |
"Efr;": "\U0001d508", | |
"Egrave": "\xc8", | |
"Egrave;": "\xc8", | |
"Element;": "\u2208", | |
"Emacr;": "\u0112", | |
"EmptySmallSquare;": "\u25fb", | |
"EmptyVerySmallSquare;": "\u25ab", | |
"Eogon;": "\u0118", | |
"Eopf;": "\U0001d53c", | |
"Epsilon;": "\u0395", | |
"Equal;": "\u2a75", | |
"EqualTilde;": "\u2242", | |
"Equilibrium;": "\u21cc", | |
"Escr;": "\u2130", | |
"Esim;": "\u2a73", | |
"Eta;": "\u0397", | |
"Euml": "\xcb", | |
"Euml;": "\xcb", | |
"Exists;": "\u2203", | |
"ExponentialE;": "\u2147", | |
"Fcy;": "\u0424", | |
"Ffr;": "\U0001d509", | |
"FilledSmallSquare;": "\u25fc", | |
"FilledVerySmallSquare;": "\u25aa", | |
"Fopf;": "\U0001d53d", | |
"ForAll;": "\u2200", | |
"Fouriertrf;": "\u2131", | |
"Fscr;": "\u2131", | |
"GJcy;": "\u0403", | |
"GT": ">", | |
"GT;": ">", | |
"Gamma;": "\u0393", | |
"Gammad;": "\u03dc", | |
"Gbreve;": "\u011e", | |
"Gcedil;": "\u0122", | |
"Gcirc;": "\u011c", | |
"Gcy;": "\u0413", | |
"Gdot;": "\u0120", | |
"Gfr;": "\U0001d50a", | |
"Gg;": "\u22d9", | |
"Gopf;": "\U0001d53e", | |
"GreaterEqual;": "\u2265", | |
"GreaterEqualLess;": "\u22db", | |
"GreaterFullEqual;": "\u2267", | |
"GreaterGreater;": "\u2aa2", | |
"GreaterLess;": "\u2277", | |
"GreaterSlantEqual;": "\u2a7e", | |
"GreaterTilde;": "\u2273", | |
"Gscr;": "\U0001d4a2", | |
"Gt;": "\u226b", | |
"HARDcy;": "\u042a", | |
"Hacek;": "\u02c7", | |
"Hat;": "^", | |
"Hcirc;": "\u0124", | |
"Hfr;": "\u210c", | |
"HilbertSpace;": "\u210b", | |
"Hopf;": "\u210d", | |
"HorizontalLine;": "\u2500", | |
"Hscr;": "\u210b", | |
"Hstrok;": "\u0126", | |
"HumpDownHump;": "\u224e", | |
"HumpEqual;": "\u224f", | |
"IEcy;": "\u0415", | |
"IJlig;": "\u0132", | |
"IOcy;": "\u0401", | |
"Iacute": "\xcd", | |
"Iacute;": "\xcd", | |
"Icirc": "\xce", | |
"Icirc;": "\xce", | |
"Icy;": "\u0418", | |
"Idot;": "\u0130", | |
"Ifr;": "\u2111", | |
"Igrave": "\xcc", | |
"Igrave;": "\xcc", | |
"Im;": "\u2111", | |
"Imacr;": "\u012a", | |
"ImaginaryI;": "\u2148", | |
"Implies;": "\u21d2", | |
"Int;": "\u222c", | |
"Integral;": "\u222b", | |
"Intersection;": "\u22c2", | |
"InvisibleComma;": "\u2063", | |
"InvisibleTimes;": "\u2062", | |
"Iogon;": "\u012e", | |
"Iopf;": "\U0001d540", | |
"Iota;": "\u0399", | |
"Iscr;": "\u2110", | |
"Itilde;": "\u0128", | |
"Iukcy;": "\u0406", | |
"Iuml": "\xcf", | |
"Iuml;": "\xcf", | |
"Jcirc;": "\u0134", | |
"Jcy;": "\u0419", | |
"Jfr;": "\U0001d50d", | |
"Jopf;": "\U0001d541", | |
"Jscr;": "\U0001d4a5", | |
"Jsercy;": "\u0408", | |
"Jukcy;": "\u0404", | |
"KHcy;": "\u0425", | |
"KJcy;": "\u040c", | |
"Kappa;": "\u039a", | |
"Kcedil;": "\u0136", | |
"Kcy;": "\u041a", | |
"Kfr;": "\U0001d50e", | |
"Kopf;": "\U0001d542", | |
"Kscr;": "\U0001d4a6", | |
"LJcy;": "\u0409", | |
"LT": "<", | |
"LT;": "<", | |
"Lacute;": "\u0139", | |
"Lambda;": "\u039b", | |
"Lang;": "\u27ea", | |
"Laplacetrf;": "\u2112", | |
"Larr;": "\u219e", | |
"Lcaron;": "\u013d", | |
"Lcedil;": "\u013b", | |
"Lcy;": "\u041b", | |
"LeftAngleBracket;": "\u27e8", | |
"LeftArrow;": "\u2190", | |
"LeftArrowBar;": "\u21e4", | |
"LeftArrowRightArrow;": "\u21c6", | |
"LeftCeiling;": "\u2308", | |
"LeftDoubleBracket;": "\u27e6", | |
"LeftDownTeeVector;": "\u2961", | |
"LeftDownVector;": "\u21c3", | |
"LeftDownVectorBar;": "\u2959", | |
"LeftFloor;": "\u230a", | |
"LeftRightArrow;": "\u2194", | |
"LeftRightVector;": "\u294e", | |
"LeftTee;": "\u22a3", | |
"LeftTeeArrow;": "\u21a4", | |
"LeftTeeVector;": "\u295a", | |
"LeftTriangle;": "\u22b2", | |
"LeftTriangleBar;": "\u29cf", | |
"LeftTriangleEqual;": "\u22b4", | |
"LeftUpDownVector;": "\u2951", | |
"LeftUpTeeVector;": "\u2960", | |
"LeftUpVector;": "\u21bf", | |
"LeftUpVectorBar;": "\u2958", | |
"LeftVector;": "\u21bc", | |
"LeftVectorBar;": "\u2952", | |
"Leftarrow;": "\u21d0", | |
"Leftrightarrow;": "\u21d4", | |
"LessEqualGreater;": "\u22da", | |
"LessFullEqual;": "\u2266", | |
"LessGreater;": "\u2276", | |
"LessLess;": "\u2aa1", | |
"LessSlantEqual;": "\u2a7d", | |
"LessTilde;": "\u2272", | |
"Lfr;": "\U0001d50f", | |
"Ll;": "\u22d8", | |
"Lleftarrow;": "\u21da", | |
"Lmidot;": "\u013f", | |
"LongLeftArrow;": "\u27f5", | |
"LongLeftRightArrow;": "\u27f7", | |
"LongRightArrow;": "\u27f6", | |
"Longleftarrow;": "\u27f8", | |
"Longleftrightarrow;": "\u27fa", | |
"Longrightarrow;": "\u27f9", | |
"Lopf;": "\U0001d543", | |
"LowerLeftArrow;": "\u2199", | |
"LowerRightArrow;": "\u2198", | |
"Lscr;": "\u2112", | |
"Lsh;": "\u21b0", | |
"Lstrok;": "\u0141", | |
"Lt;": "\u226a", | |
"Map;": "\u2905", | |
"Mcy;": "\u041c", | |
"MediumSpace;": "\u205f", | |
"Mellintrf;": "\u2133", | |
"Mfr;": "\U0001d510", | |
"MinusPlus;": "\u2213", | |
"Mopf;": "\U0001d544", | |
"Mscr;": "\u2133", | |
"Mu;": "\u039c", | |
"NJcy;": "\u040a", | |
"Nacute;": "\u0143", | |
"Ncaron;": "\u0147", | |
"Ncedil;": "\u0145", | |
"Ncy;": "\u041d", | |
"NegativeMediumSpace;": "\u200b", | |
"NegativeThickSpace;": "\u200b", | |
"NegativeThinSpace;": "\u200b", | |
"NegativeVeryThinSpace;": "\u200b", | |
"NestedGreaterGreater;": "\u226b", | |
"NestedLessLess;": "\u226a", | |
"NewLine;": "\n", | |
"Nfr;": "\U0001d511", | |
"NoBreak;": "\u2060", | |
"NonBreakingSpace;": "\xa0", | |
"Nopf;": "\u2115", | |
"Not;": "\u2aec", | |
"NotCongruent;": "\u2262", | |
"NotCupCap;": "\u226d", | |
"NotDoubleVerticalBar;": |
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)
(Sorry about that, but we can’t show files that are this big right now.)