Identify units by hash

This commit is contained in:
Michal Čihař 2012-02-28 11:24:53 +01:00
parent 4a7586e9f2
commit 846189dbdc
3 changed files with 21 additions and 7 deletions

View File

@ -2,7 +2,7 @@ from django.db import models
from lang.models import Language
from util import is_plural, split_plural, join_plural
from util import is_plural, split_plural, join_plural, msg_checksum
class TranslationManager(models.Manager):
def update_from_blob(self, subproject, code, path, blob):
@ -23,16 +23,17 @@ class UnitManager(models.Manager):
'''
src = join_plural(unit.source.strings)
ctx = unit.getcontext()
checksum = msg_checksum(src, ctx)
import trans.models
try:
dbunit = self.get(
translation = translation,
source = src,
context = ctx)
checksum = checksum)
force = False
except:
dbunit = trans.models.Unit(
translation = translation,
checksum = checksum,
source = src,
context = ctx)
force = True

View File

@ -164,8 +164,8 @@ class SubProject(models.Model):
class Translation(models.Model):
subproject = models.ForeignKey(SubProject)
language = models.ForeignKey(Language)
translated = models.FloatField(default = 0)
fuzzy = models.FloatField(default = 0)
translated = models.FloatField(default = 0, db_index = True)
fuzzy = models.FloatField(default = 0, db_index = True)
revision = models.CharField(max_length = 40, default = '', blank = True)
filename = models.CharField(max_length = 200)
@ -221,13 +221,14 @@ class Translation(models.Model):
class Unit(models.Model):
translation = models.ForeignKey(Translation)
checksum = models.CharField(max_length = 40, default = '', blank = True, db_index = True)
location = models.TextField(default = '', blank = True)
context = models.TextField(default = '', blank = True)
flags = models.TextField(default = '', blank = True)
source = models.TextField()
target = models.TextField(default = '', blank = True)
fuzzy = models.BooleanField(default = False)
translated = models.BooleanField(default = False)
fuzzy = models.BooleanField(default = False, db_index = True)
translated = models.BooleanField(default = False, db_index = True)
objects = UnitManager()

View File

@ -1,3 +1,4 @@
import hashlib
PLURAL_SEPARATOR = '\x00\x00'
@ -13,3 +14,14 @@ def split_plural(s):
def join_plural(s):
return PLURAL_SEPARATOR.join(s)
def msg_checksum(source, context):
'''
Returns checksum of source string, used for quick lookup.
We use MD5 as it is faster than SHA1.
'''
m = hashlib.md5()
m.update(source)
m.update(context)
return m.hexdigest()