Citations+-+Delete+Duplicates

In RootsMagic 4 (and maybe still in RM6), merging databases having common people with identical citations has resulted in duplicate citations for the merged person or couple and their events. These are counted in RM's database properties report but not flagged as a problem and may not be detected until running a report or editing a person. An early SQLite query provides a more comprehensive report - see the page RMGC_Properties - Query and/or the RM//trix// program. Another early query generates a spreadsheet report of all citations with a column counting duplicates - see All Citations & Dupes Count - Query, from which one could work to locate all instances and manually delete them one at a time. In March, 2014, RootsMagic Forums user Vyger posted under a 2010 topic on anomalies from merging that he was faced with a large number of duplicate citations and no easy way to get rid of them - pick up the the discussion [|here]. He was sent the script below and reported that it successfully cleaned out all 916 duplicates with no apparent side effects. As this script may be useful to others, it is posted for the first time here, four years after its initial development.


 * Caveat Emptor **: the script was written at the time of RM4 and does not check for differences between webtags for citations, a feature introduced later, and some other long-established fields, e.g. Quality. The Media system also changed from RM4 so the script may not handle differences in citation media tags correctly. Therefore, be sure to run this on a copy of your database, not your master, and review carefully before adopting the resulting database to go forward.

code format="sql" -- Citations-Dupes-Delete.sql -- 2010-02-13 ve3meo -- Deletes duplicate citations for each person, family and events -- Uses LEFT OUTER JOINS to include the most orphaned citations --    Citations for Alternate Names, added column for NameTable.IsPrimary AS Uniq to all queries --    and negated it for Alt Name and Couple.Wife queries; filter on Uniq for principal name to --     reduce multiple listing of same citation OR Uniq ISNULL for citations unlinked to persons. --    Requires a temp table because of speed degradation when incorporated in main selects; --    filtering can be done on screen in SQLiteDeveloper. -- --    QUOTE around BLOB type fields from CitationTable to display text where some SQLite --     managers merely say BLOB. -- rev 2010-12-11 -- align with RMGC_Properties procedure; prev version was too loose. -- -- BEGIN -- all Personal citations for Individual DROP TABLE IF EXISTS tmpCitations CREATE TEMP TABLE tmpCitations (CITID INTEGER, SrcID INTEGER, RIN INTEGER, Uniq INTEGER, RinCitation TEXT);

INSERT INTO tmpCitations SELECT c.CITATIONID, c.sourceid AS SrcID, n.ownerid AS RIN, n.IsPrimary AS Uniq, QUOTE(n.OwnerID) || 'Personal' || QUOTE(s.NAME) || QUOTE(s.refnumber) || QUOTE(s.actualtext) || QUOTE(s.comments) || QUOTE(CAST(s.Fields AS TEXT)) || QUOTE(mm1.mediafile) || QUOTE(c.refnumber) || QUOTE(c.actualtext) || QUOTE(c.comments) || QUOTE(CAST(c.Fields AS TEXT)) || QUOTE(mm2.mediafile) AS RinCitation FROM citationtable c  LEFT OUTER JOIN sourcetable s ON c.sourceid=s.sourceid LEFT OUTER JOIN persontable p ON c.ownerid=p.personid LEFT OUTER JOIN nametable n ON p.personid=n.ownerid LEFT OUTER JOIN medialinktable ml1 ON s.SourceID=ml1.OwnerID AND ml1.OwnerType=3 LEFT OUTER JOIN multimediatable mm1 ON ml1.MediaID=mm1.MediaID LEFT OUTER JOIN medialinktable ml2 ON c.CitationID=ml2.OwnerID AND ml2.OwnerType=4 LEFT OUTER JOIN multimediatable mm2 ON ml2.MediaID=mm2.MediaID WHERE c.ownertype=0 AND +n.IsPrimary=1 -- GROUP BY RinCitation

UNION ALL -- all Fact citations for Individual SELECT c.CITATIONID, c.sourceid AS SrcID, n.ownerid AS RIN, n.IsPrimary AS Uniq, QUOTE(n.OwnerID) || QUOTE(f.NAME) || QUOTE(e.EventID) || QUOTE(s.NAME) || QUOTE(s.refnumber) || QUOTE(s.actualtext) || QUOTE(s.comments) || QUOTE(CAST(s.Fields AS TEXT)) || QUOTE(mm1.mediafile) || QUOTE(c.refnumber) || QUOTE(c.actualtext) || QUOTE(c.comments) || QUOTE(CAST(c.Fields AS TEXT)) || QUOTE(mm2.mediafile) AS RinCitation FROM citationtable c  LEFT OUTER JOIN sourcetable s ON c.sourceid=s.sourceid LEFT OUTER JOIN eventtable e ON c.ownerid=e.eventid LEFT OUTER JOIN persontable p ON e.ownerid=p.personid LEFT OUTER JOIN nametable n ON p.personid=n.ownerid LEFT OUTER JOIN facttypetable f ON e.eventtype=f.facttypeid LEFT OUTER JOIN medialinktable ml1 ON s.SourceID=ml1.OwnerID AND ml1.OwnerType=3 LEFT OUTER JOIN multimediatable mm1 ON ml1.MediaID=mm1.MediaID LEFT OUTER JOIN medialinktable ml2 ON c.CitationID=ml2.OwnerID AND ml2.OwnerType=4 LEFT OUTER JOIN multimediatable mm2 ON ml2.MediaID=mm2.MediaID WHERE c.ownertype=2 AND e.ownertype=0 AND f.ownertype=0 AND +n.IsPrimary=1 -- GROUP BY RinCitation

UNION ALL -- all Couple citations for Father|Husband|Partner 1 SELECT c.CITATIONID, c.sourceid AS SrcID, n.ownerid AS RIN, n.IsPrimary AS Uniq, QUOTE(n.OwnerID) || QUOTE(f.NAME) || QUOTE(e.EventID) || s.NAME || s.refnumber || QUOTE(s.actualtext) || QUOTE(s.comments) || (s.Fields) || QUOTE(mm1.mediafile) || c.refnumber || QUOTE(c.actualtext) || QUOTE(c.comments) || (c.Fields) || QUOTE(mm2.mediafile) AS RinCitation FROM citationtable c  LEFT OUTER JOIN sourcetable s ON c.sourceid=s.sourceid LEFT OUTER JOIN familytable fm ON c.ownerid=fm.FamilyID LEFT OUTER JOIN persontable p ON fm.fatherid=p.personid LEFT OUTER JOIN nametable n ON p.personid=n.ownerid LEFT OUTER JOIN eventtable e ON e.ownerid=fm.familyid LEFT OUTER JOIN facttypetable f ON e.eventtype=f.facttypeid LEFT OUTER JOIN medialinktable ml1 ON s.SourceID=ml1.OwnerID AND ml1.OwnerType=3 LEFT OUTER JOIN multimediatable mm1 ON ml1.MediaID=mm1.MediaID LEFT OUTER JOIN medialinktable ml2 ON c.CitationID=ml2.OwnerID AND ml2.OwnerType=4 LEFT OUTER JOIN multimediatable mm2 ON ml2.MediaID=mm2.MediaID WHERE c.ownertype=1 AND e.ownertype=1 AND f.ownertype=1 AND +n.IsPrimary=1 -- GROUP BY RinCitation

UNION ALL -- Citations for Alternate Names SELECT c.CITATIONID, c.sourceid AS SrcID, n.ownerid AS RIN, n.IsPrimary AS Uniq, n.OwnerID || 'Alternate Name' || s.NAME || s.refnumber || QUOTE(s.actualtext) || QUOTE(s.comments) || (s.Fields) || QUOTE(mm1.mediafile) || c.refnumber || QUOTE(c.actualtext) || QUOTE(c.comments) || (c.Fields) || QUOTE(mm2.mediafile) AS RinCitation FROM citationtable c  LEFT OUTER JOIN sourcetable s ON c.sourceid=s.sourceid LEFT OUTER JOIN nametable n ON n.nameid=c.ownerid LEFT OUTER JOIN medialinktable ml1 ON s.SourceID=ml1.OwnerID AND ml1.OwnerType=3 LEFT OUTER JOIN multimediatable mm1 ON ml1.MediaID=mm1.MediaID LEFT OUTER JOIN medialinktable ml2 ON c.CitationID=ml2.OwnerID AND ml2.OwnerType=4 LEFT OUTER JOIN multimediatable mm2 ON ml2.MediaID=mm2.MediaID WHERE c.ownertype=7 AND +n.IsPrimary=0 -- GROUP BY RinCitation ORDER BY rincitation

-- Delete Duplicates DELETE FROM CitationTable WHERE CitationID IN (   SELECT c.CitID    FROM tmpcitations C INNER JOIN tmpcitations C1 USING (rincitation) WHERE C.RowID = C1.ROWID+1    ORDER BY c.CitID   )

-- Count duplicates --SELECT '- Duplicate Citations', COUNT(1), 'identical in most respects, cluttering reports' -- FROM tmpcitations C INNER JOIN tmpcitations C1 USING (rincitation) WHERE C.RowID = C1.ROWID+1 --ORDER BY c.CitID --;

-- List Duplicates --SELECT c.CitID -- FROM tmpcitations C INNER JOIN tmpcitations C1 USING (rincitation) WHERE C.RowID = C1.ROWID+1 --ORDER BY c.CitID --;

code