User:Slaporte/US Code

The US Code Formatting Tool will process a formatted ascii file of US Code.
 * See a demo of this tool
 * Download an ascii file of the US Code

Version history
This processes the HTML files downloadable from FDSys.
 * 8/16/2010 - fixed revisionIndex and related functions; improved documentation
 * 8/14/2010 - Improved documentation; renamed functions & variables;
 * 8/12/2010 - New code with docs
 * 7/6/2010
 * 7/1/2010 - Demo code

Code
","|}\n ",$html);	$html = preg_replace("/(.*)/","$1",$html);	/**	* Replace html comment with PDFPage to Template:page	*/	//$html = preg_replace("//"," ",$html);	/**	* run expcite when the expcite comment is found	*/	if(preg_match("//",$html,$expcite)){		$cite = expcite($expcite[1]);	}	$html = str_replace(" "," ",$html);	$split = preg_split("//",$html,0,PREG_SPLIT_DELIM_CAPTURE);	foreach($split as $k => $v){		if($k == 0){			$txt[] = array("intro", $v);		}elseif(is_odd($k)){			$txt[] = array($v, trim($split[$k+1]));		}	}	/**	* Build footnotes section	*/	if(isset($footnotes)){		foreach($footnotes as $note){			if(isset($sectionFootnotes)){					$sectionFootnotes = $sectionFootnotes."\n".$note;				}else{					$sectionFootnotes = $note;				}		}	}	if(isset($sectionFootnotes)){		$sectionFootnotes = "\n\n===Foot notes===\n\n".$sectionFootnotes;	}	/**	* Build name	*/	if(!isset($nameTitle)){ $nameTitle = ""; }	if(!isset($nameSubtitle)){ $nameSubtitle = ""; }	if(!isset($nameChapter)){ $nameChapter = ""; }	if(!isset($nameSubchapter)){ $nameSubchapter = ""; }	if(!isset($namePart)){ $namePart = ""; }	if(!isset($nameSec)){ $nameSec = ""; }	if($cite["titleNo"] != false){		$nameTitle = "/Title ".$cite['titleNo']; $display = $nameTitle; }else{ $nameTitle = ""; }	if($cite["subtitleNo"] != false){ $nameTitle = "/Subtitle ".$cite['subtitleNo']; if(isset($nameSubtitle)){ $display = $nameSubtitle; }	}else{ $nameSubtitle = ""; }	if($cite["chapNo"] != false){ $nameChapter = "/Chapter ".$cite['chapNo']; $display = $nameChapter; }else{ $nameChapter = ""; }	if($cite["subchapNo"] != false){ $nameSubchapter = "/Subchapter ".$cite['subchapNo']; $display = $nameSubchapter; }else{ $nameSubchapter = ""; }	if($cite["partNo"] != false){ $namePart = "/Part ".$cite['partNo']; $display = $namePart; }else{ $namePart = ""; }	if($cite["secNo"] != false){ $nameSec = "/Sec. ".$cite['secNo']; $display = $nameSec; }else{ $nameSec = ""; }	$name = $nameTitle.$nameSubtitle.$nameChapter.$nameSubchapter.$namePart.$nameSec; /**	* print everything out */	/**	* the statute's page */	foreach($txt as $section) { if($section[0] == "sourcecredit"){ $revision = determineRevisionKey($section[1]); }	}	/**	* Build $fulltext, the body of the section */	if(!isset($fulltext)){ $fulltext = ""; }	$prevYearCheck = compareAmendments($name, $revision, $revisionIndex); if(preg_match("/[0-9]+/",$prevYearCheck,$prevYearFound)){ /**		* if content already exists, transclude it rather than uploading it		*/ print " \nUnited States Code (".$currentyear.")".$name." \n\n \n\n\n"; }else{ foreach($txt as $section){ $sectionlist[] = $section[0]." "; /**			* go through each section and get headers and text */			switch($section[0]){ case "head ": $fulltext = $fulltext. getFieldHeadThree($section[1]); break; case "statute": $fulltext = $fulltext. indentStatute($section[1]); break; case "analysis": $fulltext = $fulltext. "\n{| class=wikitable".getAnalysisSection($section[1]); break; case "sourcecredit": $revision = determineRevisionKey($section[1]); $fulltext = $fulltext. "\n\n===Source(s)===\n\n".getSourcecreditParagraph($section[1]); break; case "historicalandrevision-note": $fulltext = $fulltext. "\n\n===Historical and revision notes===\n\n".getP($section[1]); break; case "miscellaneous-note": $fulltext = $fulltext. "\n\n===Miscellaneous notes===\n\n".getP($section[1]); break; case "amendment-note": $fulltext = $fulltext. "\n\n===Amendment notes===\n\n".getP($section[1]); break; case "shorttitle-amendment-note": $fulltext = $fulltext. "\n\n===Short title amendment notes===\n\n".getP($section[1]); break; case "effectivedate-note": $fulltext = $fulltext. getFieldHeadFour($section[1]).getP($section[1]); break; case "savingsprovision-note": $fulltext = $fulltext. "\n\n===Savings provision===\n\n".getP($section[1]); break; case "titlehead": $fulltext = $fulltext. getFieldHeadThree($section[1]) .getFieldHeadFour($section[1]).getP($section[1]); break; case "structuralhead": $fulltext = $fulltext. getFieldHeadThree($section[1]) .getFieldHeadFour($section[1]).getP($section[1]); break; case "executivedate-amendment-note": $fulltext = $fulltext. getFieldHeadFour($section[1]).getP($section[1]); break; case "shorttitle-note": $fulltext = $fulltext. getFieldHeadFour($section[1]).getP($section[1]); break; case "codification-note": $fulltext = $fulltext. getFieldHeadFour($section[1]).getP($section[1]); break; case "changeofname-note": $fulltext = $fulltext. getFieldHeadFour($section[1]).getP($section[1]); break; case "priorprovisions-note": $footnotes[] = getFieldHeadFour($section[1]).getP($section[1]); break; case "referenceintext-note": $fulltext = $fulltext. getFieldHeadFour($section[1]).getP($section[1]); break; case "function-transfer-note": $fulltext = $fulltext. getFieldHeadFour($section[1]).getP($section[1]); break; case "repealedhead": $fulltext = $fulltext. getFieldHeadThree($section[1]) .getFieldHeadFour($section[1]).getP($section[1]); break; case "repealsummary": $fulltext = $fulltext. getFieldHeadFour($section[1]).getP($section[1]); break; case "effectivedate-repeal-note": $fulltext = $fulltext. getFieldHeadFour($section[1]).getP($section[1]); break; case "futureamendment-note": $fulltext = $fulltext. getFieldHeadFour($section[1]).getP($section[1]); break; case "terminationdate-amendment-note": $fulltext = $fulltext. getFieldHeadFour($section[1]).getP($section[1]); break; case "effectivedate-termination-amendment-note": $fulltext = $fulltext. getFieldHeadFour($section[1]).getP($section[1]); break; case "omittedhead": $fulltext = $fulltext. getFieldHeadThree($section[1]). getFieldHeadFour($section[1]).getP($section[1]); break; case "effectivedate-terminationdate-note": $fulltext = $fulltext. getFieldHeadFour($section[1]).getP($section[1]); break; case "terminationdate-note": $fulltext = $fulltext. getFieldHeadFour($section[1]).getP($section[1]); break; case "function-delegation-note": $fulltext = $fulltext. getFieldHeadFour($section[1]).getP($section[1]); break; case "repeal-note": $fulltext = $fulltext. getFieldHeadFour($section[1]).getP($section[1]); break; case "construction-amendment-note": $fulltext = $fulltext. getFieldHeadFour($section[1]).getP($section[1]); break; case "construction-note": $fulltext = $fulltext. getFieldHeadFour($section[1]).getP($section[1]); break; case "function-abolition-construction": $fulltext = $fulltext. getFieldHeadFour($section[1]).getP($section[1]); break; case "titleenactmentcredit": $fulltext = $fulltext. getFieldHeadFour($section[1]).getP($section[1]); break; case "terminationdate-repeal-note": $fulltext = $fulltext. getFieldHeadFour($section[1]).getP($section[1]); break; case "function-transfer-repeal-savingsclause-similarprovisions-note": $fulltext = $fulltext. getFieldHeadFour($section[1]).getP($section[1]); break; }		}		/**		* add start marker, title, and */		print " \nUnited States Code (".$currentyear.")".$name." \n&lt;noinclude>\n\n&lt;/noinclude>\n\n"; $fulltext = $fulltext. $sectionFootnotes; /** 		* add a Last-amended category, for checking a section against a previous year */		if(isset($revision)) { $fulltext = $fulltext. "\n"; }else{ $fulltext = $fulltext. "";		}		$fulltext = $fulltext. "\n\n"; }	/**	* statute's talk page */	$fulltext = $fulltext. "\n \nTalk:United States Code (".$currentyear.")".$name." \n\n \n\n \n"; /**	* create citation redirect, for convenient linking. */	$nameTitle = $nameSubtitle = $nameChapter = $nameSubchapter = $namePart = $nameSec = $display = false; if($nameSec != false) { $fulltext = $fulltext."\n\n".$title." U.S.C. &sect; ".$cite['secNo']." (".$currentyear.")\n\n\n#REDIRECT United States Code (".$currentyear.")".$nameTitle.$nameChapter.$nameSubchapter.$nameSec."\n\n\n\n"; }	$sectionSize = strlen($fulltext); print $fulltext; /**	* generate master list of sections */	global $masterList; $masterList["United States Code (".$data[2].")".$name] = $revision; return $sectionSize; }

print "";

$us = explode("|",$_GET["u"]);

?> Format Title of U.S. Code  fieldset { width:70em; } Enter URL of HTML file to format URL ">

Submit

Create bot file

<?php

/**
 * assemble the full code

foreach($us as $u){ $targetURL = $u; $userAgent = '';

/**	* Open up each file specified in $us. Multiple files are seperated by | */	$ch = curl_init; curl_setopt($ch, CURLOPT_USERAGENT, $userAgent); curl_setopt($ch, CURLOPT_URL,$targetURL); curl_setopt($ch, CURLOPT_FAILONERROR, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_AUTOREFERER, true); curl_setopt($ch, CURLOPT_RETURNTRANSFER,true); curl_setopt($ch, CURLOPT_TIMEOUT, 100); $html= curl_exec($ch); $sections = preg_split("/( \nAUTHORITIES-PUBLICATION-NAME: ".$data[0]." \nAUTHORITIES-PUBLICATION-ID: ".$data[1]." \nAUTHORITIES-PUBLICATION-YEAR: ".$data[2]." \nAUTHORITIES-LAWS-ENACTED-THROUGH-DATE: ".$data[3]." \nSEARCHABLE-LAWS-ENACTED-THROUGH-DATE: ".$data[4]." \n<li>AUTHORITIES-USC-TITLE-NAME: ".$data[5]."</li> \n<li>AUTHORITIES-USC-TITLE-ENUM: ".$data[6]."</li> \n<li>AUTHORITIES-USC-TITLE-STATUS: ".$data[7]."</li> \n<li>CONVERSION-PROGRAM: ".$data[8]."</li> \n<li>CONVERSION-DATETIME: ".$data[9]."</li> \n</ul>"; /**	* get a list of names, for the "Next" and "Previous" sections */	foreach($sections as $section){ $names[] = getNames($section,$data[2]); }	$sizeTotal = 0; /**	* print each section via formatFDSys */	print " "; foreach($sections as $key=>$section){ if(is_odd($key)){ /**				* if it is the last section in the title, there is no "next" */				if(isset($names[$key+3])){ $sectionSize = formatFDSys($section.$sections[$key+1],$data,$names[$key-1],$names[$key+3]); unset($sections[$key]); $sizeTotal = $sizeTotal + $sectionSize; }else{ $sectionSize = formatFDSys($section.$sections[$key+1],$data,$names[$key-1],""); unset($sections[$key]); $sizeTotal = $sizeTotal + $sectionSize; }				/**				* Seperate each section into managable chunks of text */				if($sizeTotal >= $splitSize){ print " \nSplit at: ".$sizeTotal."b \n "; $sizeTotal = 0; }			}		if($key == 0){ $name[] = "United States Code (".$data[2].")".formatFDSys($section.$sections[$key], $data, "", $names[$key+3]); unset($sections[$key]); }	}	print " "; if (!$html) { echo "\ncURL error number:" .curl_errno($ch)."\ncURL error:". curl_error($ch); exit; }	/**	* Add the year to the $masterList */	print " Revision key for Title ".$data[5].", ".$data[2]." \n<textarea name='revisionkey' cols=100 rows=30>\n".RevisionIndex($masterList)." \n ";

}

?>