From d9c6fb6767c6873782847df168f8224d83ab30cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20=27Necoro=27=20Neumann?= Date: Fri, 14 Aug 2009 22:50:33 +0200 Subject: Rewrote eix-parser in Cython --> WAAAAAAAAAY faster --- portato/eix/__init__.py | 2 +- portato/eix/parser.py | 416 ----------------------------------------------- portato/eix/parser.pyx | 318 ++++++++++++++++++++++++++++++++++++ portato/eix/py_parser.py | 416 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 735 insertions(+), 417 deletions(-) delete mode 100644 portato/eix/parser.py create mode 100644 portato/eix/parser.pyx create mode 100644 portato/eix/py_parser.py (limited to 'portato/eix') diff --git a/portato/eix/__init__.py b/portato/eix/__init__.py index e726073..346fe82 100644 --- a/portato/eix/__init__.py +++ b/portato/eix/__init__.py @@ -69,7 +69,7 @@ class EixReader(object): self.file.seek(0) self.header = parser.header(self.file) - self.categories = parser.vector(self.file, parser.category, nelems = self.header.ncats()) + self.categories = parser.vector(self.file, parser.category, nelems = self.header.ncats) except: self.close() raise diff --git a/portato/eix/parser.py b/portato/eix/parser.py deleted file mode 100644 index cc42553..0000000 --- a/portato/eix/parser.py +++ /dev/null @@ -1,416 +0,0 @@ -# -*- coding: utf-8 -*- -# -# File: portato/eix/parser.py -# This file is part of the Portato-Project, a graphical portage-frontend. -# -# Copyright (C) 2006-2009 René 'Necoro' Neumann -# This is free software. You may redistribute copies of it under the terms of -# the GNU General Public License version 2. -# There is NO WARRANTY, to the extent permitted by law. -# -# Written by René 'Necoro' Neumann - -""" -The cache file supports different types of data. -In this module (nearly) all of these types have a corresponding function. - -For the exact way all the functions work, have a look at the eix format description. -""" - -from __future__ import absolute_import, with_statement -__docformat__ = "restructuredtext" - -import os -import struct -from functools import partial - -from ..helper import debug -from .exceptions import EndOfFileException - -# -# Helper -# - -def _get_bytes (file, length, expect_list = False): - """ - Return a number of bytes. - - :Parameters: - - file : file - The file to read from. - - length : int - The number of bytes to read. - - expect_list : bool - In case ``length`` is 1, only a single byte is returned. If ``expect_list`` is true, then a list is also returned in this case. - - :rtype: int or int[] - :raises EndOfFileException: if EOF is reached during execution - """ - - s = file.read(length) - - if len(s) != length: - raise EndOfFileException, file.name - - if length == 1 and not expect_list: - return ord(s) # is faster than unpack and we have a scalar - else: - return struct.unpack("%sB" % length, s) - -# -# Base Types -# - -def number (file, skip = False): - """ - Returns a number. - - :Parameters: - - file : file - The file to read from. - - skip : bool - Do not return the actual value, but just skip to the next datum. - - :rtype: int - """ - - n = _get_bytes(file, 1) - - if n < 0xFF: - value = n - else: - count = 0 - - while (n == 0xFF): - count += 1 - n = _get_bytes(file, 1) - - if n == 0: - n = 0xFF # 0xFF is encoded as 0xFF 0x00 - count -= 1 - - value = n << (count*8) - - if count > 0: - - if skip: - file.seek(count, os.SEEK_CUR) - return - - else: - rest = _get_bytes(file, count, expect_list = True) - - for i, r in enumerate(rest): - value += r << ((count - i - 1)*8) - - return value - -def vector (file, get_type, skip = False, nelems = None): - """ - Returns a vector of elements. - - :Parameters: - - file : file - The file to read from. - - get_type : function(file, bool) - The function determining type of the elements. - - skip : bool - Do not return the actual value, but just skip to the next datum. - - nelems : int - Normally the eix-Vector has the number of elements as the first argument. - If for some reason this is not the case, you can pass it in here. - - :rtype: list - """ - - if nelems is None: - nelems = number(file) - - if skip: - for i in range(nelems): - get_type(file, skip = True) - else: - return [get_type(file) for i in range(nelems)] - -def typed_vector(type, nelems = None): - """ - Shortcut to create a function for a special type of vector. - - :Parameters: - - type : function(file, bool) - The function determining type of the elements. - - nelems : int - Normally the eix-Vector has the number of elements as the first argument. - If for some reason this is not the case, you can pass it in here. - Do not return the actual value, but just skip to the next datum. - - :rtype: function(file, bool) - :see: `vector` - """ - - if nelems is None: - return partial(vector, get_type = type) - else: - return partial(vector, get_type = type, nelems = nelems) - -def string (file, skip = False): - """ - Returns a string. - - :Parameters: - - file : file - The file to read from. - - skip : bool - Do not return the actual value, but just skip to the next datum. - - :rtype: str - """ - nelems = number(file) - - if skip: - file.seek(nelems, os.SEEK_CUR) - return - else: - s = file.read(nelems) - - if len(s) != nelems: - raise EndOfFileException, file.name - - return s - -# -# Complex Types -# - -class LazyElement (object): - """ - This class models a value in the cache, which is only read on access. - - If not accessed directly, only the position inside the file is stored. - """ - __slots__ = ("file", "get_type", "_value", "pos") - - def __init__ (self, get_type, file): - """ - :Parameters: - - get_type : function(file, bool) - The function determining type of the elements. - - file : file - The file to read from. - """ - - self.file = file - self.get_type = get_type - self._value = None - - self.pos = file.tell() - get_type(file, skip=True) # skip it for the moment - - @property - def value (self): - """ - The value of the element. - """ - - if self._value is None: - old_pos = self.file.tell() - self.file.seek(self.pos, os.SEEK_SET) - self._value = self.get_type(self.file, skip = False) - self.file.seek(old_pos, os.SEEK_SET) - - return self._value - - def __call__ (self): - """ - Convenience function. Also returns the value. - """ - return self.value - -class overlay (object): - """ - Represents an overlay object. - - :IVariables: - - path : `LazyElement` - The path to the overlay - - label : `LazyElement` - The label/name of the overlay - """ - __slots__ = ("path", "label") - - def __init__ (self, file, skip = False): - """ - :Parameters: - - file : file - The file to read from. - - skip : bool - Do not return the actual value, but just skip to the next datum. - """ - - self.path = LazyElement(string, file) - self.label = LazyElement(string, file) - -class header (object): - """ - Represents the header of the cache. - - :IVariables: - - version : `LazyElement` - The version of the cache file. - - ncats : `LazyElement` - The number of categories. - - overlays : `LazyElement` <`overlay` []> - The list of overlays. - - provide : `LazyElement` - A list of "PROVIDE" values. - - licenses : `LazyElement` - The list of licenses. - - keywords : `LazyElement` - The list of keywords. - - useflags : `LazyElement` - The list of useflags. - - slots : `LazyElement` - The list of slots different from "0". - - sets : `LazyElement` - The names of world sets are the names (without leading @) of the world sets stored in /var/lib/portage/world_sets. - If SAVE_WORLD=false, the list is empty. - """ - __slots__ = ("version", "ncats", "overlays", "provide", - "licenses", "keywords", "useflags", "slots", "sets") - - def __init__ (self, file, skip = False): - """ - :Parameters: - - file : file - The file to read from. - - skip : bool - Do not return the actual value, but just skip to the next datum. - """ - def LE (t): - return LazyElement(t, file) - - self.version = LE(number) - self.ncats = LE(number) - self.overlays = LE(typed_vector(overlay)) - self.provide = LE(typed_vector(string)) - self.licenses = LE(typed_vector(string)) - self.keywords = LE(typed_vector(string)) - self.useflags = LE(typed_vector(string)) - self.slots = LE(typed_vector(string)) - self.sets = LE(typed_vector(string)) - -class package (object): - """ - The representation of one package. - - Currently, version information is not parsed and stored. - So you can gain general infos only. - - :IVariables: - - name : `LazyElement` - The name of the package. - - description : `LazyElement` - Description of the package. - - homepage : `LazyElement` - The homepage of the package. - - provide : `LazyElement` - The indices of `header.provide` representing the PROVIDE value of the package. - - license : `LazyElement` - The index of `header.licenses` representing the license of the package. - - useflags : `LazyElement` - The indices of `header.useflags` representing the IUSE value of the package. - """ - - __slots__ = ("_offset", "name", "description", "provide", - "homepage", "license", "useflags") - - def __init__ (self, file, skip = False): - """ - :Parameters: - - file : file - The file to read from. - - skip : bool - Do not return the actual value, but just skip to the next datum. - """ - def LE (t): - return LazyElement(t, file) - - self._offset = number(file) - - after_offset = file.tell() - - self.name = LE(string) - self.description = LE(string) - self.provide = LE(typed_vector(number)) - self.homepage = LE(string) - self.license = LE(number) - self.useflags = LE(typed_vector(number)) - - # self.versions = LE(typed_vector(version)) - # for the moment just skip the versions - file.seek(self._offset - (file.tell() - after_offset), os.SEEK_CUR) - -class category (object): - """ - Represents a whole category. - - :IVariables: - - name : `LazyElement` - The category name. - - packages : `LazyElement` <`package` []> - All the packages of the category. - """ - __slots__ = ("name", "packages") - - def __init__ (self, file, skip = False): - """ - :Parameters: - - file : file - The file to read from. - - skip : bool - Do not return the actual value, but just skip to the next datum. - """ - self.name = LazyElement(string, file) - self.packages = LazyElement(typed_vector(package), file) diff --git a/portato/eix/parser.pyx b/portato/eix/parser.pyx new file mode 100644 index 0000000..453376e --- /dev/null +++ b/portato/eix/parser.pyx @@ -0,0 +1,318 @@ +# -*- coding: utf-8 -*- +# +# File: portato/eix/_parser.pyx +# This file is part of the Portato-Project, a graphical portage-frontend. +# +# Copyright (C) 2006-2009 René 'Necoro' Neumann +# This is free software. You may redistribute copies of it under the terms of +# the GNU General Public License version 2. +# There is NO WARRANTY, to the extent permitted by law. +# +# Written by René 'Necoro' Neumann + +""" +The cache file supports different types of data. +In this module (nearly) all of these types have a corresponding function. + +For the exact way all the functions work, have a look at the eix format description. +""" + +__docformat__ = "restructuredtext" + +cdef extern from "stdio.h": + ctypedef struct FILE: + pass + + int fgetc(FILE* stream) + + int EOF + int SEEK_CUR + +cdef extern from "Python.h": + FILE* PyFile_AsFile(object) + + ctypedef int Py + +ctypedef unsigned char UChar +ctypedef long long LLong + +from portato.eix.exceptions import EndOfFileException + +# +# Helper +# + +cdef int _get_byte (FILE* file) except -1: + cdef int c = fgetc(file) + + if c == EOF: + raise EndOfFileException + + return c + + +# +# Base Types +# + +cdef LLong _number (object pfile): + cdef UChar n + cdef LLong value + cdef int i + + cdef unsigned short count = 1 + cdef FILE* file = PyFile_AsFile(pfile) + + n = _get_byte(file) + + if n < 0xFF: + return n + else: + + n = _get_byte(file) + while (n == 0xFF): + count += 1 + n = _get_byte(file) + + if n == 0: + value = 0xFF # 0xFF is encoded as 0xFF 0x00 + count -= 1 + else: + value = n + + for i in range(count): + value = (value << 8) | (_get_byte(file)) + + return value + +def number (file): + """ + Returns a number. + + :param file: The file to read from + :type file: file + :rtype: int + """ + + return _number(file) + +def vector (file, get_type, nelems = None): + """ + Returns a vector of elements. + + :Parameters: + + file : file + The file to read from. + + get_type : function(file, bool) + The function determining type of the elements. + + nelems : int + Normally the eix-Vector has the number of elements as the first argument. + If for some reason this is not the case, you can pass it in here. + + :rtype: list + """ + + cdef LLong n + cdef LLong i + + if nelems is None: + n = _number(file) + else: + n = nelems + + l = [] + for i in range(n): + l.append(get_type(file)) + + return l + +def string (file): + """ + Returns a string. + + :param file: The file to read from + :type file: file + :rtype: str + """ + cdef LLong nelems + + nelems = _number(file) + + s = file.read(nelems) + + if len(s) != nelems: + raise EndOfFileException, file.name + + return s + +# +# Complex Types +# + +cdef class overlay: + """ + Represents an overlay object. + + :IVariables: + + path : string + The path to the overlay + + label : string + The label/name of the overlay + """ + + cdef readonly object path + cdef readonly object label + + def __init__ (self, file): + """ + :param file: The file to read from + :type file: file + """ + + self.path = string(file) + self.label = string(file) + +cdef class header: + """ + Represents the header of the cache. + + :IVariables: + + version : int + The version of the cache file. + + ncats : int + The number of categories. + + overlays : `overlay` [] + The list of overlays. + + provide : string[] + A list of "PROVIDE" values. + + licenses : string[] + The list of licenses. + + keywords : string[] + The list of keywords. + + useflags : string[] + The list of useflags. + + slots : string[] + The list of slots different from "0". + + sets : string[] + The names of world sets are the names (without leading @) of the world sets stored in /var/lib/portage/world_sets. + If SAVE_WORLD=false, the list is empty. + """ + + cdef readonly object version + cdef readonly object ncats + cdef readonly object overlays + cdef readonly object provide + cdef readonly object licenses + cdef readonly object keywords + cdef readonly object useflags + cdef readonly object slots + cdef readonly object sets + + def __init__ (self, file): + """ + :param file: The file to read from + :type file: file + """ + self.version = number(file) + self.ncats = number(file) + self.overlays = vector(file, overlay) + self.provide = vector(file, string) + self.licenses = vector(file, string) + self.keywords = vector(file, string) + self.useflags = vector(file, string) + self.slots = vector(file, string) + self.sets = vector(file, string) + +cdef class package: + """ + The representation of one package. + + Currently, version information is not parsed and stored. + So you can gain general infos only. + + :IVariables: + + name : string + The name of the package. + + description : string + Description of the package. + + homepage : string + The homepage of the package. + + provide : int[] + The indices of `header.provide` representing the PROVIDE value of the package. + + license : int + The index of `header.licenses` representing the license of the package. + + useflags : int[] + The indices of `header.useflags` representing the IUSE value of the package. + """ + + cdef readonly object _offset + cdef readonly object name + cdef readonly object description + cdef readonly object provide + cdef readonly object homepage + cdef readonly object license + cdef readonly object useflags + + def __init__ (self, file): + """ + :param file: The file to read from + :type file: file + """ + self._offset = number(file) + + after_offset = file.tell() + + self.name = string(file) + self.description = string(file) + self.provide = vector(file, number) + self.homepage = string(file) + self.license = number(file) + self.useflags = vector(file, number) + + # self.versions = LE(typed_vector(version)) + # for the moment just skip the versions + file.seek(self._offset - (file.tell() - after_offset), SEEK_CUR) + +cdef class category: + """ + Represents a whole category. + + :IVariables: + + name : string + The category name. + + packages : `package` [] + All the packages of the category. + """ + + cdef readonly object name + cdef readonly object packages + + def __init__ (self, file): + """ + :param file: The file to read from + :type file: file + """ + self.name = string(file) + self.packages = vector(file, package) diff --git a/portato/eix/py_parser.py b/portato/eix/py_parser.py new file mode 100644 index 0000000..cc42553 --- /dev/null +++ b/portato/eix/py_parser.py @@ -0,0 +1,416 @@ +# -*- coding: utf-8 -*- +# +# File: portato/eix/parser.py +# This file is part of the Portato-Project, a graphical portage-frontend. +# +# Copyright (C) 2006-2009 René 'Necoro' Neumann +# This is free software. You may redistribute copies of it under the terms of +# the GNU General Public License version 2. +# There is NO WARRANTY, to the extent permitted by law. +# +# Written by René 'Necoro' Neumann + +""" +The cache file supports different types of data. +In this module (nearly) all of these types have a corresponding function. + +For the exact way all the functions work, have a look at the eix format description. +""" + +from __future__ import absolute_import, with_statement +__docformat__ = "restructuredtext" + +import os +import struct +from functools import partial + +from ..helper import debug +from .exceptions import EndOfFileException + +# +# Helper +# + +def _get_bytes (file, length, expect_list = False): + """ + Return a number of bytes. + + :Parameters: + + file : file + The file to read from. + + length : int + The number of bytes to read. + + expect_list : bool + In case ``length`` is 1, only a single byte is returned. If ``expect_list`` is true, then a list is also returned in this case. + + :rtype: int or int[] + :raises EndOfFileException: if EOF is reached during execution + """ + + s = file.read(length) + + if len(s) != length: + raise EndOfFileException, file.name + + if length == 1 and not expect_list: + return ord(s) # is faster than unpack and we have a scalar + else: + return struct.unpack("%sB" % length, s) + +# +# Base Types +# + +def number (file, skip = False): + """ + Returns a number. + + :Parameters: + + file : file + The file to read from. + + skip : bool + Do not return the actual value, but just skip to the next datum. + + :rtype: int + """ + + n = _get_bytes(file, 1) + + if n < 0xFF: + value = n + else: + count = 0 + + while (n == 0xFF): + count += 1 + n = _get_bytes(file, 1) + + if n == 0: + n = 0xFF # 0xFF is encoded as 0xFF 0x00 + count -= 1 + + value = n << (count*8) + + if count > 0: + + if skip: + file.seek(count, os.SEEK_CUR) + return + + else: + rest = _get_bytes(file, count, expect_list = True) + + for i, r in enumerate(rest): + value += r << ((count - i - 1)*8) + + return value + +def vector (file, get_type, skip = False, nelems = None): + """ + Returns a vector of elements. + + :Parameters: + + file : file + The file to read from. + + get_type : function(file, bool) + The function determining type of the elements. + + skip : bool + Do not return the actual value, but just skip to the next datum. + + nelems : int + Normally the eix-Vector has the number of elements as the first argument. + If for some reason this is not the case, you can pass it in here. + + :rtype: list + """ + + if nelems is None: + nelems = number(file) + + if skip: + for i in range(nelems): + get_type(file, skip = True) + else: + return [get_type(file) for i in range(nelems)] + +def typed_vector(type, nelems = None): + """ + Shortcut to create a function for a special type of vector. + + :Parameters: + + type : function(file, bool) + The function determining type of the elements. + + nelems : int + Normally the eix-Vector has the number of elements as the first argument. + If for some reason this is not the case, you can pass it in here. + Do not return the actual value, but just skip to the next datum. + + :rtype: function(file, bool) + :see: `vector` + """ + + if nelems is None: + return partial(vector, get_type = type) + else: + return partial(vector, get_type = type, nelems = nelems) + +def string (file, skip = False): + """ + Returns a string. + + :Parameters: + + file : file + The file to read from. + + skip : bool + Do not return the actual value, but just skip to the next datum. + + :rtype: str + """ + nelems = number(file) + + if skip: + file.seek(nelems, os.SEEK_CUR) + return + else: + s = file.read(nelems) + + if len(s) != nelems: + raise EndOfFileException, file.name + + return s + +# +# Complex Types +# + +class LazyElement (object): + """ + This class models a value in the cache, which is only read on access. + + If not accessed directly, only the position inside the file is stored. + """ + __slots__ = ("file", "get_type", "_value", "pos") + + def __init__ (self, get_type, file): + """ + :Parameters: + + get_type : function(file, bool) + The function determining type of the elements. + + file : file + The file to read from. + """ + + self.file = file + self.get_type = get_type + self._value = None + + self.pos = file.tell() + get_type(file, skip=True) # skip it for the moment + + @property + def value (self): + """ + The value of the element. + """ + + if self._value is None: + old_pos = self.file.tell() + self.file.seek(self.pos, os.SEEK_SET) + self._value = self.get_type(self.file, skip = False) + self.file.seek(old_pos, os.SEEK_SET) + + return self._value + + def __call__ (self): + """ + Convenience function. Also returns the value. + """ + return self.value + +class overlay (object): + """ + Represents an overlay object. + + :IVariables: + + path : `LazyElement` + The path to the overlay + + label : `LazyElement` + The label/name of the overlay + """ + __slots__ = ("path", "label") + + def __init__ (self, file, skip = False): + """ + :Parameters: + + file : file + The file to read from. + + skip : bool + Do not return the actual value, but just skip to the next datum. + """ + + self.path = LazyElement(string, file) + self.label = LazyElement(string, file) + +class header (object): + """ + Represents the header of the cache. + + :IVariables: + + version : `LazyElement` + The version of the cache file. + + ncats : `LazyElement` + The number of categories. + + overlays : `LazyElement` <`overlay` []> + The list of overlays. + + provide : `LazyElement` + A list of "PROVIDE" values. + + licenses : `LazyElement` + The list of licenses. + + keywords : `LazyElement` + The list of keywords. + + useflags : `LazyElement` + The list of useflags. + + slots : `LazyElement` + The list of slots different from "0". + + sets : `LazyElement` + The names of world sets are the names (without leading @) of the world sets stored in /var/lib/portage/world_sets. + If SAVE_WORLD=false, the list is empty. + """ + __slots__ = ("version", "ncats", "overlays", "provide", + "licenses", "keywords", "useflags", "slots", "sets") + + def __init__ (self, file, skip = False): + """ + :Parameters: + + file : file + The file to read from. + + skip : bool + Do not return the actual value, but just skip to the next datum. + """ + def LE (t): + return LazyElement(t, file) + + self.version = LE(number) + self.ncats = LE(number) + self.overlays = LE(typed_vector(overlay)) + self.provide = LE(typed_vector(string)) + self.licenses = LE(typed_vector(string)) + self.keywords = LE(typed_vector(string)) + self.useflags = LE(typed_vector(string)) + self.slots = LE(typed_vector(string)) + self.sets = LE(typed_vector(string)) + +class package (object): + """ + The representation of one package. + + Currently, version information is not parsed and stored. + So you can gain general infos only. + + :IVariables: + + name : `LazyElement` + The name of the package. + + description : `LazyElement` + Description of the package. + + homepage : `LazyElement` + The homepage of the package. + + provide : `LazyElement` + The indices of `header.provide` representing the PROVIDE value of the package. + + license : `LazyElement` + The index of `header.licenses` representing the license of the package. + + useflags : `LazyElement` + The indices of `header.useflags` representing the IUSE value of the package. + """ + + __slots__ = ("_offset", "name", "description", "provide", + "homepage", "license", "useflags") + + def __init__ (self, file, skip = False): + """ + :Parameters: + + file : file + The file to read from. + + skip : bool + Do not return the actual value, but just skip to the next datum. + """ + def LE (t): + return LazyElement(t, file) + + self._offset = number(file) + + after_offset = file.tell() + + self.name = LE(string) + self.description = LE(string) + self.provide = LE(typed_vector(number)) + self.homepage = LE(string) + self.license = LE(number) + self.useflags = LE(typed_vector(number)) + + # self.versions = LE(typed_vector(version)) + # for the moment just skip the versions + file.seek(self._offset - (file.tell() - after_offset), os.SEEK_CUR) + +class category (object): + """ + Represents a whole category. + + :IVariables: + + name : `LazyElement` + The category name. + + packages : `LazyElement` <`package` []> + All the packages of the category. + """ + __slots__ = ("name", "packages") + + def __init__ (self, file, skip = False): + """ + :Parameters: + + file : file + The file to read from. + + skip : bool + Do not return the actual value, but just skip to the next datum. + """ + self.name = LazyElement(string, file) + self.packages = LazyElement(typed_vector(package), file) -- cgit v1.2.3