fengdh / mdict-js Goto Github PK
View Code? Open in Web Editor NEWA pure Javascript implementation for parsing MDict file (mdx/mdd) in local.
A pure Javascript implementation for parsing MDict file (mdx/mdd) in local.
I noticed that current version is not support Chinese search caused by chars compare (in my testing environment).
3 methods refined
reduce()
seekVanguard()
shrink()
1 methods added
hasChinese()
Detail
function hasChinese(Text) {
let reg = new RegExp(/[^\x00-\xff]/g);
return reg.test(Text);
}
function reduce(arr, phrase) {
var len = arr.length;
if (len > 1) {
len = len >> 1;
let adapt_lastword = _adaptKey(arr[len - 1].last_word);
var compare = phrase > adapt_lastword;
if (hasChinese(phrase)){
if (hasChinese(adapt_lastword)){
compare = (phrase.localeCompare(adapt_lastword,'zh') > 0)
}else{
compare = true;
}
}
return compare
? reduce(arr.slice(len), phrase)
: reduce(arr.slice(0, len), phrase);
} else {
return arr[0];
}
}
function seekVanguard(phrase) {
phrase = _adaptKey(phrase);
var kdx = reduce(KEY_INDEX, phrase);
// look back for the first record block containing keyword for the specified phrase
let adapt_lastword = _adaptKey(kdx.last_word);
var compare = phrase <= adapt_lastword;
if (hasChinese(phrase)){
if (hasChinese(adapt_lastword)){
compare = (phrase.localeCompare(adapt_lastword,'zh') <= 0)
}else{
compare = false;
}
}
if (compare) {
var index = kdx.index - 1, prev;
while (prev = KEY_INDEX[index]) {
if (_adaptKey(prev.last_word).localeCompare(_adaptKey(kdx.last_word),'zh') !== 0) {
break;
}
kdx = prev;
index--;
}
}
return loadKeys(kdx).then(function (list) {
var idx = shrink(list, phrase);
// look back for the first matched keyword position
while (idx > 0) {
if (_adaptKey(list[--idx]) !== _adaptKey(phrase)) {
idx++;
break;
}
}
return [kdx, Math.min(idx, list.length - 1), list];
});
}
function shrink(arr, phrase) {
if (hasChinese(phrase)){
//contains phrase
let near_words = arr.filter(function (word) {
return word.startsWith(phrase);
});
//equals to phrase
let equal_words = near_words.filter(function (word) {
return word === phrase;
});
if (equal_words.length > 0){
return arr.indexOf(equal_words[0]);
}else if (near_words.length > 0){
return arr.indexOf(near_words[0]);
}else{
//contains phrase first char
let brev = phrase[0];
let brev_near_words = arr.filter(function (word) {
return word.startsWith(brev);
});
if (brev_near_words.length > 0){
return arr.indexOf(brev_near_words[0]);
}else{
return 0;
}
}
}else{
var len = arr.length, sub;
if (len > 1) {
len = len >> 1;
var key = _adaptKey(arr[len]);
if (phrase < key) {
sub = arr.slice(0, len);
sub.pos = arr.pos;
} else {
sub = arr.slice(len);
sub.pos = (arr.pos || 0) + len;
}
return shrink(sub, phrase);
} else {
return (arr.pos || 0) + (phrase <= _adaptKey(arr[0]) ? 0 : 1);
}
}
}
I added some code into mdict-parser.js
and posted a pull-request months ago. This is a better method below.
Usage
replace this line: return harvest(list.map(findWord));
with: return harvest(list.map(findWord)).map(replaceStyleMark);;
function replaceStyleMark(content_string){
/**
* Replace Mark to CsssStyle
* Processing With: https://www.douban.com/note/526161004/ - 法汉汉法词典
* Niubility Dictionary Ultimate v1.0
*/
var content_with_style = "";
//check if has special mark in content
let content_style_keys = content_string.match(/`\d+`/g);
if (content_style_keys == null || content_style_keys.length <= 0){
content_with_style = content_string;
}
else {
//check if has stylesheet in mdx
let styles_string = attrs['StyleSheet'];
if (styles_string !== undefined && styles_string != null) {
//make styles
let styles_string_adjuested_space = (" " + styles_string.replace(/^\s*/g, ''));
let style_strings = styles_string_adjuested_space.split(/ \d+ /g).filter(function (style) {
return style.replace(/\s/g, '') !== "";
});
var styles = {};
$.each(style_strings, function (index, value) {
let style_key = "`"+(index+1)+"`";
styles[style_key] = value;
});
//make content breaks
$.each(content_style_keys, function (index, style_key) {
content_string = content_string.replace(new RegExp(style_key, 'g'), "<break>" + style_key);
});
//loop content breaks
$.each(content_string.split("<break>").filter(function (line) {
return line.replace(/\s/g, '') !== '';
}), function (line_index, line) {
//loop styles
$.each(styles, function (key, style) {
if (line.includes(key)){
let style_templates = style.match(new RegExp('<[^>]*>(\\s)+</[^>]*>', 'g'));
if (style_templates !== null && style_templates.length === 1) {
let template = style_templates[0];
let line_content = line.replace(new RegExp(key, 'g'), '');
let style_with_line_content = style.replace(
template,
template.replace(/>(\s)+</g, ">" + line_content + "<"));
line = style_with_line_content;
} else {
line = line.replace(new RegExp(key, 'g'), style);
}
}
});
content_with_style = content_with_style + line
});
}
//replace invalid content_style_key with <br>
content_with_style = content_with_style.replace(/`\d+`/g, "<br>");
}
return content_with_style;
}
I want to use your library with my project..
But your project is made by requirejs and mine is es6....
It's up to you but, how about make this project to es6??
Continue from my blog's comment
Nice to read your introduction about my mdict-js project on GitHub. I have problem to "load/discover local dictionary file without interference from user himself once reconfigured". Do you have any idea? I suppose parsing mdd/mdx through XmlHTTPRequest is quiet dumb, even for local resources.
For a better reading experience with foreign language, I used to make a prototype like this:
http://www.hi-pda.com/forum/vi...
But the public online dictionary stopped to service, that's why I wrote mdict-js. Wish to share idea with you.(Sorry, I know no Vietnamese. Thanks for Google translating service.)
I'm having trouble accessing Disqus from my country so I will continue the discussion here if it's OK with you.
As you probably know from my blog, I'm planing on building a truely cross-platform application for MDict using React and React Native. I have plan for Android, Linux and Windows (but it's easy to port to the rest thanks to React).
Now get to the point :
XmlHTTPRequest is quiet dumb
AFAIK, for security reason, there is no different way you can do this.
I looked at your project and it's a mess :( . I can refactor them to use npm as a dependencies manager and use Webpack as a build tool. This way, your MDict parser can be re-used elsewhere and decoupled from the website
Not support words with capital letters, such as 'England'
Hello. Thank you so much for the project
I'm not a developer bur I'm doing this Meditation and Buddhism related site texts https://find.dhamma.gift/
And your project could be a perfect part of it but I'm almost 0 in js so I couldn't change the input from user file to input from local file on the server
I tried some nonsense like var
But of course it doesn't work.
Could you please give me some hint how to do that. Or may be you already have this option somewhere else in other project?
I was looking for any library or sources which helps me to read MDX dictionary files in Java, I have found some resources already such as mdict-android-opensource but it does not have its implantations to how read mdx database files. Does any one knows a good source about these files and possible libraries which could be used for it? Thanks very much.
mdict-js is a nice tool to read mdx file, do you happen to look into the possibility to pack txt files to create mdx file, like mdict-utils?
Thank you for any help and comment!
A declarative, efficient, and flexible JavaScript library for building user interfaces.
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
An Open Source Machine Learning Framework for Everyone
The Web framework for perfectionists with deadlines.
A PHP framework for web artisans
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
Some thing interesting about web. New door for the world.
A server is a program made to process requests and deliver data to clients.
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
Some thing interesting about visualization, use data art
Some thing interesting about game, make everyone happy.
We are working to build community through open source technology. NB: members must have two-factor auth.
Open source projects and samples from Microsoft.
Google ❤️ Open Source for everyone.
Alibaba Open Source for everyone
Data-Driven Documents codes.
China tencent open source team.