import json from borgstore.backends.errors import BackendAlreadyExists, ObjectNotFound from borgstore.store import Store import browser_history import os import sqlite3 from glob import glob from itertools import chain, islice, pairwise from collections import Counter #%% os.chdir("/shares/wjones/work/browser-hist") # def walk_ # opera_query = """SELECT datetime((last_visit_time/1000000)-11644473600, 'unixepoch', 'localtime') # AS last_visit_time,url,title FROM urls""" # ff_query = """SELECT datetime(visit_date/1000000,'unixepoch'), moz_places.url,title FROM moz_places # INNER JOIN moz_historyvisits ON moz_places.id = moz_historyvisits.place_id""" opera_query = """SELECT last_visit_time,url,title FROM urls""" ff_query = """SELECT visit_date, moz_places.url,title FROM moz_places INNER JOIN moz_historyvisits ON moz_places.id = moz_historyvisits.place_id""" def op_to_ff(op_ts): to_nix = (op_ts - 11644473600000000) rem = to_nix % 1000 return to_nix - rem def get_ff(db_name): con = sqlite3.connect(f"file:{db_name}?mode=ro", uri=True) cur = con.cursor() for row in cur.execute(ff_query): yield row def get_op(db_name): con = sqlite3.connect(f"file:{db_name}?mode=ro", uri=True) cur = con.cursor() for row in map(lambda r: (op_to_ff(int(r[0])), r[1], r[2]), cur.execute(opera_query)): yield row # f = Firefox() ff_hist_suff = "AppData/Roaming/Mozilla/Firefox/Profiles/1jagj0rl.default-release-1653066032964/places.sqlite" ff_hist_db = f"SDSSDHII-480G-G25-userprofile-2022-08-25-202531/{ff_hist_suff}" op_hist_suff = "AppData/Roaming/Opera Software/Opera Stable/History" op_hist_db = f"ST9500420AS-userprofile-2016-11-28-180409/{op_hist_suff}" # ff_query = """SELECT datetime(visit_date/1000000,'unixepoch'), moz_places.url,title FROM moz_places # INNER JOIN moz_historyvisits ON moz_places.id = moz_historyvisits.place_id""" store = Store(url="file:///shares/wjones/work/browser-hist/borgstore", levels={"browser": [2]}) try: store.create() except BackendAlreadyExists: pass with store: for row in islice(sorted(chain(get_ff(ff_hist_db), get_op(op_hist_db))), 1000): val = {"url": row[1], "title": row[2]} key = row[0] try: loaded_vals = json.loads(store.load(f"browser/{str(key)}")) # Value of 0 Webkit time is 11.6 billion seconds in the past... # key != -11644473600000000: if val not in loaded_vals: loaded_vals.append(val) val = loaded_vals else: continue except ObjectNotFound: pass if not isinstance(val, list): val = [val] store.store(f"browser/{str(key)}", bytes(json.dumps(val), encoding="utf-8")) # for row in cur.execute(ff_query): # print(row) # for (old, new) in pairwise(sorted(glob("$1"))): # with open(f"{old}/$2") as fp_old, open(f"{new}/$2") as fp_new: # print(f"{old}->{new}:", sum((Counter(fp_old) & Counter(fp_new)).values()))