/ff/


Start a New Thread


75092 – ``firefox bookmarks structure''750925f887754b97856313f5fdc0614e

@16ebeb229a8448ffb5edb8de351a592e Anonymous 2018-03-30 21:57:53
A high-level overview: https://developer.mozilla.org/en-US/docs/Mozilla/Tech/Places/Database
https://support.mozilla.org/en-US/questions/996823

Bookmark data is stored in places.sqlite inside moz_bookmarks table.

CREATE TABLE moz_bookmarks (
  id INTEGER PRIMARY KEY, 
  type INTEGER, 
  fk INTEGER DEFAULT NULL, 
  parent INTEGER, 
  position INTEGER, 
  title LONGVARCHAR, 
  keyword_id INTEGER, 
  folder_type TEXT, 
  dateAdded INTEGER, 
  lastModified INTEGER, 
  guid TEXT)


The are three types:
1 - "text/x-moz-place" actual bookmarks, "Recently Bookmarked", "Most Visited"...
2 - "text/x-moz-place-container"
this includes tags and folders
3 - "text/x-moz-place-separator" // nobody needs this shit
13 replies omitted. Click here to view the first page.
@f04255488ad0438abb62f7cd1a6ff77e Anonymous 2018-04-01 01:03:30
#!/usr/bin/env python3
import json
import pydash

PLACE_T = "text/x-moz-place"
CONTAINER_T = "text/x-moz-place-container"
SEPARATOR_T = "text/x-moz-place-separator"

def merge(left_node, right_node, id_offset):
    left_children = left_node.get('children')
    right_children = right_node.get('children')

    if right_children is None:
        return
    if left_children is None:
        left_node['children'] = right_children
        return

    for child in right_children:
        if child['type'] in (PLACE_T, SEPARATOR_T):
            child['id'] += id_offset
            left_children += [child]
        else:  # container
            title = child['title']
            # try to find the same container in left_node
            left_container = pydash.find(left_children,
                                         lambda x: x.get('type') == CONTAINER_T and x['title'] == title)
            if left_container is not None:
                merge(left_container, child, id_offset)
            else:
                child['id'] += id_offset
                left_children += [child]

    # fix indexes
    for index, child in enumerate(left_children):
        child['index'] = index

def get_tree(file_path):
    with open(file_path, 'r', encoding="utf8") as f:
        return json.loads(f.read())

def save(file_path, tree):
    with open(file_path, "w", encoding="utf8") as f:
        json.dump(tree, f, ensure_ascii=False, indent=2)

def traverse(tree, fun):
    fun(tree)
    children = tree.get('children')
    if children:
        for child in children:
            traverse(child, fun)

def get_max_id(tree):
    max_id = 0

    def fun(node):
        nonlocal max_id
        id = node['id']
        max_id = id if id > max_id else id
    traverse(tree, fun)
    return max_id

def find_duplicate_uri(tree):
    unique = []
    dups = []

    def fun(node):
        nonlocal unique
        nonlocal dups
        uri = node.get('uri')
        if uri and uri in unique and not uri.startswith('place:'):
            dups += [uri]
        else:
            unique += [uri]

    traverse(tree, fun)
    return dups

if __name__ == '__main__':
    left_tree = get_tree('esr_windows_bookmarks-2018-03-31.json')
    right_tree = get_tree('esr_debian_bookmarks-2018-03-31.json')
    id_offset = get_max_id(left_tree) + 1
    merge(left_tree, right_tree, id_offset)
    save('output.json', left_tree)

    print("duplicate URIs:")
    for dup in find_duplicate_uri(left_tree):
        print(dup)
@86fabf409f0c4457a9caf2c8359a244a Anonymous 2018-04-01 01:06:03
The script merges two bookmark backups and shows duplicate URIs.
TODO: get rid of duplicates, but merge their tags.
@48ebd15cf10f4c3d8881f1fbe4f2a158 Anonymous 2018-04-13 21:30:49
@86fab@86fabf409f0c4457a9caf2c8359a244a
#!/usr/bin/env python3
import json
import pydash
import sys

PLACE_T = "text/x-moz-place"
CONTAINER_T = "text/x-moz-place-container"
SEPARATOR_T = "text/x-moz-place-separator"

def merge(left_node, right_node, id_offset):
    left_children = left_node.get('children')
    right_children = right_node.get('children')

    if right_children is None:
        return
    if left_children is None:
        left_node['children'] = right_children
        return

    for child in right_children:
        if child['type'] in (PLACE_T, SEPARATOR_T):
            child['id'] += id_offset
            left_children += [child]
        else:  # container
            title = child['title']
            guid = child.get('guid')
            # try to find the same container in left_node
            left_container = pydash.find(left_children,
                # fuck python with its one-line lambdas
                lambda x: x.get('type') == CONTAINER_T and (x['title'] == title or x.get('guid') == guid))

            if left_container is not None:
                merge(left_container, child, id_offset)
            else:
                child['id'] += id_offset
                left_children += [child]

def get_tree(file_path):
    with open(file_path, 'r', encoding="utf8") as f:
        return json.loads(f.read())

def save(file_path, tree):
    with open(file_path, "w", encoding="utf8") as f:
        json.dump(tree, f, ensure_ascii=False, indent=2)

def traverse(tree, fun):
    fun(tree)
    children = tree.get('children')
    if children:
        for child in children:
            traverse(child, fun)

def get_max_id(tree):
    max_id = 0

    def fun(node):
        nonlocal max_id
        id = node['id']
        max_id = id if id > max_id else id
    traverse(tree, fun)
    return max_id

def merge_tags(left_node, right_node):
    left_tags = left_node.get('tags')
    right_tags = right_node.get('tags')

    if right_tags is None:
        return
    if left_tags is None:
        left_node['tags'] = right_tags
        return

    tags = case_insensitive_uniq(left_tags.split(',') + right_tags.split(','))
    left_node['tags'] = ','.join(tags)

def case_insensitive_uniq(data):
    seen, result = set(), []
    for item in data:
        lower_item = item.lower()
        if lower_item not in seen:
            seen.add(lower_item)
            result.append(item)
    return result

def merge_duplicates(tree):
    unique = []
    unique_uris = []

    def fun(node):
        nonlocal unique
        nonlocal unique_uris
        uri = node.get('uri')
        if uri:
            if uri in unique_uris and not uri.startswith('place:'):
                orig = pydash.find(unique, lambda x: x['uri'] == uri)
                merge_tags(orig, node)
                node['delete'] = True
            else:
                unique += [node]
                unique_uris += [uri]

    def delete_marked_nodes(node):
        children = node.get('children')
        if children:
            node['children'] = pydash.filter_(children, lambda x: x.get('delete') is None)

            for index, child in enumerate(children):
                child['index'] = index  # fix indexes
                delete_marked_nodes(child)

    traverse(tree, fun)
    delete_marked_nodes(tree)

def find_duplicates(tree):
    unique = []
    dups = []

    def fun(node):
        nonlocal unique
        nonlocal dups
        uri = node.get('uri')
        if uri and uri in unique and not uri.startswith('place:'):
            dups += [node]
        else:
            unique += [uri]

    traverse(tree, fun)
    return dups

if __name__ == '__main__':
    if len(sys.argv) < 4:
        sys.stderr.write("Usage: /path/to/src.json /path/to/dst.json /path/to/output.json")
        exit(1)

    _, src_file, dst_file, out_file = sys.argv

    left_tree = get_tree(src_file)
    right_tree = get_tree(dst_file)
    id_offset = get_max_id(left_tree) + 1
    merge(left_tree, right_tree, id_offset)
    merge_duplicates(left_tree)
    save(out_file, left_tree)

    dups = find_duplicates(left_tree)
    if dups:
        sys.stderr.write("duplicate URIs:")

    for dup in dups:
        sys.stderr.write(dup)